***Diss File JK
***Created 06/02/2022
***Updated 10/04/2022  

********************************************************************************
*Do file to clean and prepare Talk of Europe data 1999-2013
********************************************************************************

*Set file paths directory
global raw "/Users/janakonle/Documents/Studium/Dissertation/STATA/raw"
global data "/Users/janakonle/Documents/Studium/Dissertation/STATA/data"
global gra "/Users/janakonle/Documents/Studium/Dissertation/STATA/graphs"
global tab "/Users/janakonle/Documents/Studium/Dissertation/STATA/tables"
global do "/Users/janakonle/Documents/Studium/Dissertation/STATA/do"

********************************************************************************
*Parliamentary term 5 (20/7/1999-19/7/2004)
********************************************************************************
*Domestic Violence 
*(1) Open data
import delimited "$raw/5_dv_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12 
duplicates tag speechnr, generate(speech)
*Change start and end date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_dv_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_dv_party.csv", varnames(1) clear 
drop v12 

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_dv_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_dv_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen dv = 1 

label define ///
	dv_term ///
	0 "No" ///
	1 "Yes"

label values dv dv_term 

save "$data/5_dv.dta", replace 
********************************************************************************
*Childcare
*(1) Open data
import delimited "$raw/5_ch_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_ch_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_ch_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_ch_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_ch_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen ch = 1 

label define ///
	ch_term ///
	0 "No" ///
	1 "Yes"

label values ch ch_term 

save "$data/5_ch.dta", replace 
********************************************************************************
*Labour Market
*(1) Open data
import delimited "$raw/5_lm_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_lm_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_lm_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_lm_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_lm_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen lm = 1 

label define ///
	lm_term ///
	0 "No" ///
	1 "Yes"

label values lm lm_term 

save "$data/5_lm.dta", replace 
********************************************************************************
*Discrimination (formerly 'Criminal Justice')
*(1) Open data
import delimited "$raw/5_cj_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_cj_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_cj_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_cj_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_cj_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen cj = 1 

label define ///
	cj_term ///
	0 "No" ///
	1 "Yes"

label values cj cj_term 

save "$data/5_cj.dta", replace 
********************************************************************************
*Women's Health
*(1) Open data
import delimited "$raw/5_wh_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_wh_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_wh_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_wh_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_wh_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen wh = 1 

label define ///
	wh_term ///
	0 "No" ///
	1 "Yes"

label values wh wh_term 

save "$data/5_wh.dta", replace 
********************************************************************************
*Representation 
*(1) Open data
import delimited "$raw/5_rep_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_rep_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/5_rep_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_rep_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/5_rep_com.dta" 							/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen rep = 1 

label define ///
	rep_term ///
	0 "No" ///
	1 "Yes"

label values rep rep_term 

save "$data/5_rep.dta", replace 

********************************************************************************

*Merge all
*(1) Open data
use "$data/5_dv.dta", clear 

*(2) Merge all datasets
merge 1:1 speechnr date using "$data/5_ch.dta" 								    /*Checking the merge worked*/
drop _merge 
merge 1:1 speechnr date using "$data/5_lm.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/5_cj.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/5_wh.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/5_rep.dta" 	
drop _merge
drop dictionary
duplicates tag speechnr, generate(speech)										/*Checking for duplicates*/
drop speech

*(3) Dictionary dummy
gen dictionary = 1 

label define ///
	dictionary_term ///
	0 "No" ///
	1 "Yes"

label values dictionary dictionary_term 

*(4) Date to merge later
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

save "$data/5_dictionary.dta", replace 

********************************************************************************

*Append different parts of all speeches in term 5
clear
*Set file paths directory
global raw "/Users/janakonle/Documents/Studium/Dissertation/STATA/raw"
global data "/Users/janakonle/Documents/Studium/Dissertation/STATA/data"
global gra "/Users/janakonle/Documents/Studium/Dissertation/STATA/graphs"
global tab "/Users/janakonle/Documents/Studium/Dissertation/STATA/tables"
global do "/Users/janakonle/Documents/Studium/Dissertation/STATA/do"

import delimited "$raw/5_part1.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
drop if speech == 18															/*These have weirdly been added thrice. First duplicate removal here. The speech takes place during Hans-Peter Martin being independent ("parteilos"), not as Liste..., these later observations will be removed in the date process*/

*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date												/*Success in removal of other duplicates*/
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech
																					
save "$data/5_part1.dta", replace

import delimited "$raw/5_part2.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

tab name if speech == 14
drop if speech == 14															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_part2.dta", replace

import delimited "$raw/5_part3.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 18															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_part3.dta", replace

import delimited "$raw/5_part4.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 19															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_part4.dta", replace

import delimited "$raw/5_part5.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 16															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/5_part5.dta", replace

use "$data/5_part1.dta", clear
append using "$data/5_part2.dta""$data/5_part3.dta""$data/5_part4.dta""$data/5_part5.dta"
save "$data/5_all_party.dta", replace

duplicates tag speechnr, generate(speech)
drop speech

*Include rest speeches from Term 5 
*(1) Open data
import delimited "$raw/5_all_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date
	
save "$data/5_all_com.dta",replace	
clear

*(3) Merging all Term 5 data
use "$data/5_all_party.dta",clear
merge 1:1 speechnr date using "$data/5_all_com.dta" 							/*Checking the merge worked*/

drop _merge dictionary

merge 1:1 speechnr date using "$data/5_dictionary.dta"
duplicates tag speechnr, generate(speech)
drop speech
/*
tab name if _merge == 2
drop if _merge == 2	
*/
drop _merge

* Dictionary dummy
replace dictionary = 0 if dictionary == .  

save "$data/5_all.dta", replace

********************************************************************************
*(2) Encoding 
*Dictionary 
replace dv = 0 if dv == .
replace ch = 0 if ch == .
replace lm = 0 if lm == . 
replace cj = 0 if cj == . 
replace wh = 0 if wh == . 
replace rep = 0 if rep == . 
	
*Gender
encode gender, gen(gender_num)
	drop gender
	rename gender_num female
	tab female
	tab female, nolab
	replace female = 0 if female == 2

label define ///																
	gender_type ///
	0 "male" ///
	1 "female"
	
label values female gender_type

*FEMM Committee
encode femm, gen(femm_num)
	drop femm 
	rename femm_num femm
	tab femm
	tab femm, nolab 
	replace femm = 0 if femm == . 
	
label define ///
	femm_type ///
	0 "No" ///
	1 "Yes"
	
label values femm femm_type 

*(3) Changing the dates      													
	
generate birth_num = date(birth, "YMD")
format birth_num %td
	drop birth
	rename birth_num birthday

*(4) Generating new variables 
/// Proportion of Women in the EP 
gen women_ep = 30.19
/*Data from: https://op.europa.eu/en/publication-detail/-/publication/1f2a7ac7-d8f7-11e9-9c4e-01aa75ed71a1/language-en*/ 

/// Age at time of Speech
gen age = age(birthday, date)

/// EP Membership duration at time of speech
gen mep_dur = datediff_frac(start_MP, date, "month")

/// FEMM Membership duration at time of speech
gen femm_dur = datediff_frac(start_FEMM, date, "month")
replace femm = 0 if femm_dur < 0
replace femm_dur = . if femm_dur < 0 

/// Parlterm Dummy
gen parlterm_5 = 1 

label define ///
	p5 ///
	0 "No" ///
	1 "Yes"

label values parlterm_5 p5 

*(5) Label variables 
label var name "Name of MEP"
label var partyname "Name of Party"
label var speechnr "Number of individual Parliamentary Speech"
label var parlterm "Parliamentary Term"
label var dv "Domestic Violence Terms"
label var ch "Childcare Terms"
label var lm "Labour Market Terms"
label var dis "Discrimination Terms"
label var wh "Women's Health Terms"
label var rep "Representation Terms"
label var dictionary "Type of Dictionary Terms discussed"
label var country "Country"
label var female "Gender of MEP"
label var femm "Membership Committee on Women's Rights and Equal Opportunities"
label var parlterm_5 "Fifth Parliamentary Term Dummy"
label var age "Age of MP at Time of Speech"
label var date "Date"
label var birthday "Birthday"
label var start_MP "Start of Membership in Term"
label var end_MP "End of Membership in Term"
label var start_FEMM "Start of Membership in FEMM"
label var end_FEMM "End of Membership in FEMM in Term"
label var femm_dur "Duration of FEMM Membership at Time of Speech"
label var mep_dur "Duration of EP Membership at Time of Speech"
label var women_ep "Proportion of Women in the European Parliament in %"
********************************************************************************
*(6) Output the clean dataset
save "$data/5_all.dta", replace 
													
********************************************************************************
*Parliamentary term 6 (20/7/2004-13/7/2009)
********************************************************************************
*Domestic Violence 
*(1) Open data
import delimited "$raw/6_dv_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12 
duplicates tag speechnr, generate(speech)
*Change start and end date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_dv_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_dv_party.csv", varnames(1) clear 
drop v12 

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_dv_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_dv_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen dv = 1 

label define ///
	dv_term ///
	0 "No" ///
	1 "Yes"

label values dv dv_term 

save "$data/6_dv.dta", replace 

********************************************************************************
*Childcare
*(1) Open data
import delimited "$raw/6_ch_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_ch_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_ch_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_ch_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_ch_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen ch = 1 

label define ///
	ch_term ///
	0 "No" ///
	1 "Yes"

label values ch ch_term 

save "$data/6_ch.dta", replace 

********************************************************************************
*Labour Market
*(1) Open data
import delimited "$raw/6_lm_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_lm_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_lm_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_lm_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_lm_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen lm = 1 

label define ///
	lm_term ///
	0 "No" ///
	1 "Yes"

label values lm lm_term 

save "$data/6_lm.dta", replace 

********************************************************************************
*Discrimination
*(1) Open data
import delimited "$raw/6_cj_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_cj_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_cj_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_cj_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_cj_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen cj = 1 

label define ///
	cj_term ///
	0 "No" ///
	1 "Yes"

label values cj cj_term 

save "$data/6_cj.dta", replace 

********************************************************************************
*Women's Health
*(1) Open data
import delimited "$raw/6_wh_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_wh_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_wh_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_wh_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_wh_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen wh = 1 

label define ///
	wh_term ///
	0 "No" ///
	1 "Yes"

label values wh wh_term 

save "$data/6_wh.dta", replace 

********************************************************************************
*Representation 
*(1) Open data
import delimited "$raw/6_rep_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_rep_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/6_rep_party.csv", varnames(1) clear 
drop v12

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_rep_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/6_rep_com.dta" 							/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen rep = 1 

label define ///
	rep_term ///
	0 "No" ///
	1 "Yes"

label values rep rep_term 

save "$data/6_rep.dta", replace 

********************************************************************************

*Merge all
*(1) Open data
use "$data/6_dv.dta", clear 

*(2) Merge all datasets
merge 1:1 speechnr date using "$data/6_ch.dta" 								    /*Checking the merge worked*/
drop _merge 
merge 1:1 speechnr date using "$data/6_lm.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/6_cj.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/6_wh.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/6_rep.dta" 	
drop _merge
drop dictionary
duplicates tag speechnr, generate(speech)										/*Checking for duplicates*/
drop speech

*(3)Dictionary dummy
gen dictionary = 1 

label define ///
	dictionary_term ///
	0 "No" ///
	1 "Yes"

label values dictionary dictionary_term 

save "$data/6_dictionary.dta", replace 

********************************************************************************

*Append different parts of all speeches in term 6
clear
*Set file paths directory
global raw "/Users/janakonle/Documents/Studium/Dissertation/STATA/raw"
global data "/Users/janakonle/Documents/Studium/Dissertation/STATA/data"
global gra "/Users/janakonle/Documents/Studium/Dissertation/STATA/graphs"
global tab "/Users/janakonle/Documents/Studium/Dissertation/STATA/tables"
global do "/Users/janakonle/Documents/Studium/Dissertation/STATA/do"

import delimited "$raw/6_part1.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 43															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech
																					
save "$data/6_part1.dta", replace

import delimited "$raw/6_part2.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 30 															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_part2.dta", replace

import delimited "$raw/6_part3.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 15															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_part3.dta", replace

import delimited "$raw/6_part4.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 27															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_part4.dta", replace

import delimited "$raw/6_part5.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if speech == 23															/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/6_part5.dta", replace

use "$data/6_part1.dta", clear
append using "$data/6_part2.dta""$data/6_part3.dta""$data/6_part4.dta""$data/6_part5.dta"
save "$data/6_all_party.dta", replace

duplicates tag speechnr, generate(speech)
drop speech

*Include rest speeches from Term 5 
*(1) Open data
import delimited "$raw/6_all_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date
	
save "$data/6_all_com.dta",replace	

*Preparing to merge 
use "$data/6_dictionary.dta", clear

generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date
	
save "$data/6_dictionary.dta", replace

use "$data/6_all_party.dta",clear

*(3) Merging all Term 5 data
merge 1:1 speechnr date using "$data/6_all_com.dta" 							/*Checking the merge worked*/
tab name if _merge == 2																

drop _merge dictionary

merge 1:1 speechnr date using "$data/6_dictionary.dta"
duplicates tag speechnr, generate(speech)
drop speech
tab name if _merge == 2 
drop _merge

* Dictionary dummy
replace dictionary = 0 if dictionary == .  

save "$data/6_all.dta", replace

********************************************************************************
*(2) Encoding 
*Dictionary 
replace dv = 0 if dv == .
replace ch = 0 if ch == .
replace lm = 0 if lm == . 
replace cj = 0 if cj == . 
replace wh = 0 if wh == . 
replace rep = 0 if rep == . 
	
*Gender
encode gender, gen(gender_num)
	drop gender
	rename gender_num female
	tab female
	tab female, nolab
	replace female = 0 if female == 2

label define ///																
	gender_type ///
	0 "male" ///
	1 "female"
	
label values female gender_type

*FEMM Committee
encode femm, gen(femm_num)
	drop femm 
	rename femm_num femm
	tab femm
	tab femm, nolab 
	replace femm = 0 if femm == . 
	
label define ///
	femm_type ///
	0 "No" ///
	1 "Yes"
	
label values femm femm_type 

*(3) Changing the dates															
	
generate birth_num = date(birth, "YMD")
format birth_num %td
	drop birth
	rename birth_num birthday

*(4) Generating new variables 
/// Proportion of Women in the EP 
gen women_ep = 31.21
/*Data from: https://op.europa.eu/en/publication-detail/-/publication/1f2a7ac7-d8f7-11e9-9c4e-01aa75ed71a1/language-en*/ 

/// Age at time of Speech
gen age = age(birthday, date)

/// EP Membership duration at time of speech
gen mep_dur = datediff_frac(start_MP, date, "month")

/// FEMM Membership duration at time of speech
gen femm_dur = datediff_frac(start_FEMM, date, "month")
replace femm = 0 if femm_dur < 0
replace femm_dur = . if femm_dur < 0 

/// Parlterm Dummy
gen parlterm_6 = 1 

label define ///
	p6 ///
	0 "No" ///
	1 "Yes"

label values parlterm_6 p6 

*(5) Label variables 
label var name "Name of MEP"
label var partyname "Name of Party"
label var speechnr "Number of individual Parliamentary Speech"
label var parlterm "Parliamentary Term"
label var dv "Domestic Violence Terms"
label var ch "Childcare Terms"
label var lm "Labour Market Terms"
label var dis "Discrimination Terms"
label var wh "Women's Health Terms"
label var rep "Representation Terms"
label var dictionary "Type of Dictionary Terms discussed"
label var country "Country"
label var female "Gender of MEP"
label var femm "Membership Committee on Women's Rights and Equal Opportunities"
label var parlterm_6 "Sixth Parliamentary Term Dummy"
label var age "Age of MP at Time of Speech"
label var date "Date"
label var birthday "Birthday"
label var start_MP "Start of Membership in Term"
label var end_MP "End of Membership in Term"
label var start_FEMM "Start of Membership in FEMM"
label var end_FEMM "End of Membership in FEMM in Term"
label var femm_dur "Duration of FEMM Membership at Time of Speech"
label var mep_dur "Duration of EP Membership at Time of Speech"
label var women_ep "Proportion of Women in the European Parliament in %"
********************************************************************************
*(6) Output the clean dataset
save "$data/6_all.dta", replace 
													
********************************************************************************
*Parliamentary term 7 (14/7/2009-30/6/2014)
********************************************************************************
*Domestic Violence 
*(1) Open data
import delimited "$raw/7_dv_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12 
duplicates tag speechnr, generate(speech)
*Change start and end date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_dv_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_dv_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==.															
drop speech

save "$data/7_dv_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_dv_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen dv = 1 

label define ///
	dv_term ///
	0 "No" ///
	1 "Yes"

label values dv dv_term 

save "$data/7_dv.dta", replace 

********************************************************************************
*Childcare
*(1) Open data
import delimited "$raw/7_ch_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_ch_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_ch_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)										
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==.															
drop speech

save "$data/7_ch_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_ch_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen ch = 1 

label define ///
	ch_term ///
	0 "No" ///
	1 "Yes"

label values ch ch_term 

save "$data/7_ch.dta", replace 

********************************************************************************
*Labour Market
*(1) Open data
import delimited "$raw/7_lm_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_lm_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_lm_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==. 															
drop speech	

save "$data/7_lm_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_lm_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen lm = 1 

label define ///
	lm_term ///
	0 "No" ///
	1 "Yes"

label values lm lm_term 

save "$data/7_lm.dta", replace 

********************************************************************************
*Discrimination
*(1) Open data
import delimited "$raw/7_cj_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_cj_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_cj_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==. 															
drop speech

save "$data/7_cj_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_cj_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen cj = 1 

label define ///
	cj_term ///
	0 "No" ///
	1 "Yes"

label values cj cj_term 

save "$data/7_cj.dta", replace 

********************************************************************************
*Women's Health
*(1) Open data
import delimited "$raw/7_wh_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_wh_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_wh_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==.															
drop speech

save "$data/7_wh_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_wh_com.dta" 								/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen wh = 1 

label define ///
	wh_term ///
	0 "No" ///
	1 "Yes"

label values wh wh_term 

save "$data/7_wh.dta", replace 

********************************************************************************
*Representation 
*(1) Open data
import delimited "$raw/7_rep_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_rep_com.dta",replace	
clear

*Preparing to merge - Party dataset
import delimited "$raw/7_rep_party.csv", varnames(1) clear 
drop v12 v13 v14

duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start != start_MP
drop start end speech 

duplicates tag speechnr, generate(speech)
drop if start_MP ==. 															
drop speech

save "$data/7_rep_party.dta",replace	

*Merging 
merge 1:1 speechnr date using "$data/7_rep_com.dta" 							/*Checking the merge worked*/
drop _merge 

* Dictionary dummy
gen rep = 1 

label define ///
	rep_term ///
	0 "No" ///
	1 "Yes"

label values rep rep_term 

save "$data/7_rep.dta", replace 

********************************************************************************

*Merge all
*(1) Open data
use "$data/7_dv.dta", clear 

*(2) Merge all datasets
merge 1:1 speechnr date using "$data/7_ch.dta" 								    /*Checking the merge worked*/
drop _merge 
merge 1:1 speechnr date using "$data/7_lm.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/7_cj.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/7_wh.dta" 	
drop _merge
merge 1:1 speechnr date using "$data/7_rep.dta" 	
drop _merge
drop dictionary
duplicates tag speechnr, generate(speech)										/*Checking for duplicates*/
drop speech

*(3)Dictionary dummy
gen dictionary = 1 

label define ///
	dictionary_term ///
	0 "No" ///
	1 "Yes"

label values dictionary dictionary_term 

save "$data/7_dictionary.dta", replace 

********************************************************************************

*Append different parts of all speeches in term 7
clear
*Set file paths directory
global raw "/Users/janakonle/Documents/Studium/Dissertation/STATA/raw"
global data "/Users/janakonle/Documents/Studium/Dissertation/STATA/data"
global gra "/Users/janakonle/Documents/Studium/Dissertation/STATA/graphs"
global tab "/Users/janakonle/Documents/Studium/Dissertation/STATA/tables"
global do "/Users/janakonle/Documents/Studium/Dissertation/STATA/do"

import delimited "$raw/7_part1.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start ==.
																				/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech
																					
save "$data/7_part1.dta", replace

import delimited "$raw/7_part2.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start ==.																/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_part2.dta", replace

import delimited "$raw/7_part3.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start ==. 
																				/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_part3.dta", replace

import delimited "$raw/7_part4.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start ==.
																				/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_part4.dta", replace

import delimited "$raw/7_part5.csv", varnames(1) clear
drop v12 v13 v14 
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start, "YMD")
format start_num %td
	drop start
	rename start_num start
	
by speechnr (start), sort: gen start_MP = start[1] 
format start_MP %td

generate end_num = date(end, "YMD")
format end_num %td
	drop end
	rename end_num end
	
by speechnr: egen end_MP = max(end)
format end_MP %td

drop if start ==.
																				/*Liste, no Dates available, thus deleted*/
																				/*there is a few interesting cases here where parties have been dissolved later and new follower parties were founded. I have decided to go with the earliest start date because the point in the regression will be to see whether the MP experience has helped the MEP to SRWI.*/
generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date

keep if end > date & start < date
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

save "$data/7_part5.dta", replace

use "$data/7_part1.dta", clear
append using "$data/7_part2.dta""$data/7_part3.dta""$data/7_part4.dta""$data/7_part5.dta"
save "$data/7_all_party.dta", replace

duplicates tag speechnr, generate(speech)
drop speech

*Include rest speeches from Term 5 
*(1) Open data
import delimited "$raw/7_all_com.csv", varnames(1) clear 
/*Data is scraped from TalkofEurope LinkedOpenData: 
https://linkedpolitics.project.cwi.nl/web/html/home.html*/

*(2) Merge party & com data
*Preparing to merge - Committee dataset
drop v12
duplicates tag speechnr, generate(speech)
*Change start date due to duplicates here already 
generate start_num = date(start_com, "YMD")
format start_num %td
	drop start_com
	rename start_num start
	
by speechnr (start), sort: gen start_FEMM = start[1] 
format start_FEMM %td

generate end_num = date(end_com, "YMD")
format end_num %td
	drop end_com
	rename end_num end
	
by speechnr: egen end_FEMM = max(end)
format end_FEMM %td

drop if start != start_FEMM 
drop start end speech 

duplicates tag speechnr, generate(speech)
drop speech

generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date
	
save "$data/7_all_com.dta",replace	
clear

*Preparing to merge 
use "$data/7_dictionary.dta", clear

generate date_num = date(date, "YMD")
format date_num %td
	drop date 
	rename date_num date
	
save "$data/7_dictionary.dta", replace 

use "$data/7_all_party.dta",clear

*(3) Merging all Term 5 data
merge 1:1 speechnr date using "$data/7_all_com.dta" 							/*Checking the merge worked*/

drop _merge dictionary

merge 1:1 speechnr date using "$data/7_dictionary.dta"
duplicates tag speechnr, generate(speech)
drop speech _merge

* Dictionary dummy
replace dictionary = 0 if dictionary == .  

save "$data/7_all.dta", replace

********************************************************************************
*(2) Encoding 
*Dictionary 
replace dv = 0 if dv == .
replace ch = 0 if ch == .
replace lm = 0 if lm == . 
replace cj = 0 if cj == . 
replace wh = 0 if wh == . 
replace rep = 0 if rep == . 
	
*Gender
encode gender, gen(gender_num)
	drop gender
	rename gender_num female
	tab female
	tab female, nolab
	replace female = 0 if female == 2

label define ///																
	gender_type ///
	0 "male" ///
	1 "female"
	
label values female gender_type

*FEMM Committee
encode femm, gen(femm_num)
	drop femm 
	rename femm_num femm
	tab femm
	tab femm, nolab 
	replace femm = 0 if femm == . 
	
label define ///
	femm_type ///
	0 "No" ///
	1 "Yes"
	
label values femm femm_type 

*(3) Changing the dates															
	
generate birth_num = date(birth, "YMD")
format birth_num %td
	drop birth
	rename birth_num birthday

*(4) Generating new variables 
/// Proportion of Women in the EP 
gen women_ep = 36.1
/*Data from: https://op.europa.eu/en/publication-detail/-/publication/1f2a7ac7-d8f7-11e9-9c4e-01aa75ed71a1/language-en*/ 

/// Age at time of Speech
gen age = age(birthday, date)

/// EP Membership duration at time of speech
gen mep_dur = datediff_frac(start_MP, date, "month")

/// FEMM Membership duration at time of speech
gen femm_dur = datediff_frac(start_FEMM, date, "month")
replace femm = 0 if femm_dur < 0
replace femm_dur = . if femm_dur < 0 
drop if mep_dur < 0

/// Parlterm Dummy
gen parlterm_7 = 1 

label define ///
	p7 ///
	0 "No" ///
	1 "Yes"

label values parlterm_7 p7 

*(5) Label variables 
label var name "Name of MEP"
label var partyname "Name of Party"
label var speechnr "Number of individual Parliamentary Speech"
label var parlterm "Parliamentary Term"
label var dv "Domestic Violence Terms"
label var ch "Childcare Terms"
label var lm "Labour Market Terms"
label var dis "Discrimination Terms"
label var wh "Women's Health Terms"
label var rep "Representation Terms"
label var dictionary "Type of Dictionary Terms discussed"
label var country "Country"
label var female "Gender of MEP"
label var femm "Membership Committee on Women's Rights and Equal Opportunities"
label var parlterm_7 "Seventh Parliamentary Term Dummy"
label var age "Age of MP at Time of Speech"
label var date "Date"
label var birthday "Birthday"
label var start_MP "Start of Membership in Term"
label var end_MP "End of Membership in Term"
label var start_FEMM "Start of Membership in FEMM"
label var end_FEMM "End of Membership in FEMM in Term"
label var femm_dur "Duration of FEMM Membership at Time of Speech"
label var mep_dur "Duration of EP Membership at Time of Speech"
label var women_ep "Proportion of Women in the European Parliament in %"
********************************************************************************
*(6) Output the clean dataset
save "$data/7_all.dta", replace 

********************************************************************************
*Terms 5 to 7 together
********************************************************************************
*(1) Append
use "$data/5_all.dta", clear 
append using "$data/6_all.dta""$data/7_all.dta"
save "$data/all_sofar.dta", replace 

*Final check for duplicates
duplicates tag speechnr, generate(speech)
tab name if speech > 0
drop speech 

*Correct the missing start / end dates
replace partyname = "NA" if partyname == ""
tab name if partyname == "NA"

*Add "No" for parlterms
replace parlterm_5 = 0 if parlterm_5 ==.
replace parlterm_6 = 0 if parlterm_6 ==.
replace parlterm_7 = 0 if parlterm_7 ==.

*Donata Gottardi
replace partyname = "Partito Democratico" if partyname == "NA" & date == td(19feb2008) 
replace start_MP = td(08may2006) if start_MP ==. & date == td(19feb2008)
replace end_MP = td(13jul2009) if end_MP ==. & date == td(19feb2008)
 
*Eva-Britt Svensson
replace partyname = "Vänsterpartiet" if partyname == "NA" & end_FEMM == td(31aug2011) & date == td(14jul2009) 
replace start_MP = td(20jul2004) if start_MP ==. & end_FEMM == td(31aug2011) & date == td(14jul2009) 
replace end_MP = td(31aug2011) if end_MP ==. & end_FEMM == td(31aug2011) & date == td(14jul2009) 
 
*Heidi Hautala
replace partyname = "Vihreä liitto" if partyname == "NA" & date == td(20jul1999) 
replace start_MP = td(01jan1995) if start_MP ==. & date == td(20jul1999)
replace end_MP = td(21jun2011) if end_MP ==. & date == td(20jul1999)

*Inês Zuber
replace partyname = "Partido Comunista Português" if partyname == "NA" & date == td(18jan2012) 
replace start_MP = td(18jan2012) if start_MP ==. & date == td(18jan2012) 
replace end_MP = td(30jan2016) if end_MP ==. & date == td(18jan2012) 
 
*Siiri Oviir
replace partyname = "Eesti Keskerakond" if partyname == "NA" & date == td(24may2012) 
replace start_MP = td(01may2004) if start_MP ==. & date == td(24may2012) 
replace end_MP = td(30jun2014) if end_MP ==. & date == td(24may2012) 

*Urszula Krupa
replace partyname = "Liga Polskich Rodzin" if partyname == "NA" & date == td(13jun2006)
replace start_MP = td(20jul2004) if start_MP ==. & date == td(13jun2006)
replace end_MP = td(13jul2009) if end_MP ==. & date == td(13jun2006)
replace partyname = "Liga Polskich Rodzin" if partyname == "NA" & date == td(14jun2006)
replace start_MP = td(20jul2004) if start_MP ==. & date == td(14jun2006)
replace end_MP = td(13jul2009) if end_MP ==. & date == td(14jun2006)

*Véronique De Keyser
replace partyname = "Parti socialiste" if partyname == "NA" & end_FEMM == td(13jul2009) & date == td(14jul2009) 
replace start_MP = td(25sep2001) if start_MP ==. & end_FEMM == td(13jul2009) & date == td(14jul2009) 
replace end_MP = td(30jun2014) if end_MP ==. & end_FEMM == td(13jul2009) & date == td(14jul2009) 

*Cristiana Muscardini
replace partyname = "Alleanza nazionale" if partyname == "NA" & date == td(20jul2004)
replace start_MP = td(25jul1989) if start_MP ==. & date == td(20jul2004)
replace end_MP = td(30jun2014) if end_MP ==. & date == td(20jul2004)
replace partyname = "Alleanza nazionale" if partyname == "NA" & date == td(23oct2012)
replace start_MP = td(25jul1989) if start_MP ==. & date == td(23oct2012)
replace end_MP = td(30jun2014) if end_MP ==. & date == td(23oct2012)

*Angelika Werthmann
replace partyname = " - " if partyname == "NA" & start_FEMM == td(01mar2011) & end_FEMM == td(7apr2014)
replace start_MP = td(14jul2010) if start_MP ==. & start_FEMM == td(01mar2011) & end_FEMM == td(7apr2014)
replace end_MP = td(30jun2014) if end_MP ==. & start_FEMM == td(01mar2011) & end_FEMM == td(7apr2014)

*(2) Encode Countries 
encode countryname, gen(country_num)
	drop countryname																	
	rename country_num country
	tab country
	tab country, nolab
label define ///
	country_type ///
	1 "Austria" ///
	2 "Belgium" ///
	3 "Bulgaria" ///
	4 "Croatia" ///
	5 "Cyprus" ///
	6 "Czech Republic" ///
	7 "Denmark" ///
	8 "Estonia" ///
	9 "Finland" ///
	10 "France" ///
	11 "Germany" ///
	12 "Greece" ///
	13 "Hungary" ///
	14 "Ireland" ///
	15 "Italy" ///
	16 "Latvia" ///
	17 "Lithuania" ///
	18 "Luxembourg" ///
	19 "Malta" ///
	20 "Netherlands" ///
	21 "Poland" ///
	22 "Portugal" ///
	23 "Romania" ///
	24 "Slovakia" ///
	25 "Slovenia" ///
	26 "Spain" ///
	27 "Sweden" ///
	28 "United Kingdom" ///

label values country country_type

*(3) Regional Dummies 
/* regions according to EuroVoc: https://en.wikipedia.org/wiki/EuroVoc*/
*Northern
gen north = (inlist(country,7,8,9,16,17,27))
tab country north
label var north "Northern EU Countries"
label define ///
	nor ///
	0 "No" ///
	1 "Yes"
	
label values ///
	north nor
	
tab country north 

*Central and Eastern 
gen ceneast = (inlist(country,3,4,6,13,21,23,24,25))
tab country ceneast
label var ceneast "Central and Eastern EU Countries"
label define ///
	ce ///
	0 "No" ///
	1 "Yes"
	
label values ///
	ceneast ce
	
tab country ceneast

*Southern 
gen south = (inlist(country,5,12,15,19,22,26))
tab country south
label var south "Southern EU Countries"
label define ///
	sou ///
	0 "No" ///
	1 "Yes"
	
label values ///
	south sou
	
tab country south

*Western
gen west = (inlist(country,1,2,10,11,14,18,20,28))
tab country west
label var west "Western EU Countries"
label define ///
	wes ///
	0 "No" ///
	1 "Yes"
	
label values ///
	west wes
	
tab country west

save "$data/all_sofar.dta", replace 

*(4) Parliamentary Party Groups (PPGs)
/*Information obtained from: https://www.europarl.europa.eu/election-results-2019/en/breakdown-national-parties-political-group/1999-2004/constitutive-session/ */

gen PPG = "NA"
label var PPG "Parliamentary Party Groups of the EP"

codebook partyname, tab(1000)

***Austria*** 
*Term 5
tab partyname if country == 1 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Österreichische Volkspartei") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Sozialdemokratische Partei Österreichs") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Die Grünen - Die Grüne Alternative") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Freiheitliche Partei Österreichs") | strpos(partyname, "parteilos") & date < td(20jul2004)

*Term 6
tab partyname if country == 1 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Österreichische Volkspartei - Liste Ursula Stenzel") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Sozialdemokratische Partei Österreichs") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Die Liberalen") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Die Grünen - Die Grüne Alternative") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Freiheitliche Partei Österreichs") | strpos(partyname, "Liste Dr. Hans-Peter Martin - Für echte Kontrolle in Brüssel") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 1 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Österreichische Volkspartei") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Sozialdemokratische Partei Österreichs") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Die Grünen - Die Grüne Alternative") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Freiheitliche Partei Österreichs") | strpos(partyname, " - ") | strpos(partyname, "Bündnis Zukunft Österreich")& date >= td(14jul2009)

br if PPG == "NA" & country == 1


***Belgium*** 
replace country = 2 if strpos(partyname, "Ecolo (Belgium)") & date < td(20jul2004)

*Term 5
tab partyname if country == 2 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Christen-Democratisch & Vlaams") | strpos(partyname, "Centre Démocrate Humaniste") | strpos(partyname, "Mouvement des Citoyens pour le Changement") | strpos(partyname, "Christliche Soziale Partei: Europäische Volkspartei") | strpos(partyname, "Parti social-chrétien") | strpos(partyname, "Christelijke Volkspartij") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Parti socialiste") | strpos(partyname, "Socialistische Partij") | strpos(partyname, "Sociaal Progressief Alternatief") & date < td(20jul2004) 

replace PPG = "ELDR" if strpos(partyname, "Vlaamse liberalen en democraten") | strpos(partyname, "Parti réformateur libéral/Front démocratique des francophones") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Ecolo") | strpos(partyname, "Indépendant") | strpos(partyname, "Groen!") | strpos(partyname, "België Spirit")| strpos(partyname, "Volksunie") | strpos(partyname, "Anders gaan arbeiden, leven en vrijen") | strpos(partyname, "Ecolo (Belgium)") & date < td(20jul2004)		
	
replace PPG = "NI - non-attached members" if strpos(partyname, "Vlaams Blok") & date < td(20jul2004)

*Term 6
tab partyname if country == 2 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Christen-Democratisch & Vlaams - Nieuw-Vlaamse Alliantie") | strpos(partyname, "Centre Démocrate Humaniste") | strpos(partyname, "Christlich Soziale Partei") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Parti Socialiste") | strpos(partyname, "Socialistische Partij.Anders- Sociaal, Progressief, Internationaal, Regionaal, Integraal Democratisch, Toekomstgericht") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Vlaamse Liberalen en Democraten - Vivant") | strpos(partyname, "Mouvement Réformateur") | strpos(partyname, "Open VLD") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Ecologistes Confédérés pour l'Organisation de Luttes Originales") | strpos(partyname, "Groen") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Vlaams Blok") | strpos(partyname, "Vlaams Belang") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 2 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Christen-Democratisch & Vlaams") | strpos(partyname, "Centre Démocrate Humaniste") | strpos(partyname, "Christlich Soziale Partei") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Parti Socialiste") | strpos(partyname, "Socialistische Partij.Anders") | strpos(partyname, "Parti socialiste") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Open Vlaamse Liberalen en Democraten") | strpos(partyname, "Mouvement Réformateur") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Groen") | strpos(partyname, "Ecologistes Confédérés pour l'Organisation de Luttes Originales") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Lijst Dedecker") | strpos(partyname, "Nieuw-Vlaamse Alliantie") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Onafhankelijk") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Vlaams Belang") & date >= td(14jul2009)

br if PPG == "NA" & country == 2


***Bulgaria*** 
*Term 6
tab partyname if country == 3 & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "BANU-PU") | strpos(partyname, "Citizens for European Development of Bulgaria") | strpos(partyname, "United Democratic Forces") & date < td(14jul2009)
	
replace PPG = "PSE" if strpos(partyname, "Coalition for Bulgaria") | strpos(partyname, "Platform European Socialists") & date < td(14jul2009)
 
replace PPG = "ALDE" if strpos(partyname, "Movement for Rights and Freedoms") | strpos(partyname, "National Movement Simeon II") & date < td(14jul2009) 

replace PPG = "NI - non-attached members" if strpos(partyname, "Attack") | strpos(partyname, "Attack Coalition") & date < td(14jul2009)

*Term 7
tab partyname if country == 3 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Citizens for European Development of Bulgaria") | strpos(partyname, "Blue Coalition") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Coalition for Bulgaria") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Movement for Rights and Freedoms") | strpos(partyname, "National Movement for Stability and Progress") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "People for Real, Open and United Democracy / Conservative Party for Democracy and Success") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, " - ") | strpos(partyname, "National-Democratic Party") | strpos(partyname, "Attack") & date >= td(14jul2009)

br if PPG == "NA" & country == 3


***Croatia*** 
*Term 7
tab partyname if country == 4 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Hrvatska demokratska zajednica") & date >= td(14jul2009) 

br if PPG == "NA" & country == 4


***Cyprus*** 
*Term 5
tab partyname if country == 5 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Dimocraticos Synagermos") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Kinima Sosialdimokraton") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Anorthotiko Komma Ergazomenou Laou - Aristera - Nees Dinameis") & date < td(20jul2004)

*Term 6
tab partyname if country == 5 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Dimokratikos Synagermos") | strpos(partyname, "Gia tin Evropi") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Anorthotiko Komma Ergazomenou Laou - Aristera - Nees Dynameis") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 5 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Democratic Rally") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Movement for Social Democracy EDEK") | strpos(partyname, "Democratic Party") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Progressive Party of Working People - Left - New Forces") & date >= td(14jul2009)

br if PPG == "NA" & country == 5


***Czech Republic*** 
*Term 5
tab partyname if country == 6 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Křesťanská a demokratická unie - Československá strana lidová") | strpos(partyname, "Občanská demokratická strana") | strpos(partyname, "no party") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Česká strana sociálně demokratická") & date < td(20jul2004)

*Term 6
tab partyname if country == 6 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Občanská demokratická strana") | strpos(partyname, "SNK sdruženi nezávislých a Evropští demokraté") | strpos(partyname, "Křesťanská a demokratická unie - Československá strana lidová") | strpos(partyname, "Evropští demokraté") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Česká strana sociálně demokratická") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Komunistická strana Čech a Moravy") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "NEZÁVISLÍ") | strpos(partyname, "NEZÁVISLÍ/DEMOKRATÉ") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 6 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Křesťanská a demokratická unie - Československá strana lidová") | strpos(partyname, " - ") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Česká strana sociálně demokratická") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Občanská demokratická strana") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Komunistická strana Čech a Moravy") & date >= td(14jul2009)

br if PPG == "NA" & country == 6


***Denmark*** 
*Term 5
tab partyname if country == 7 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Det Konservative Folkeparti") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Socialdemokratiet") | strpos(partyname, "Občanská demokratická strana") | strpos(partyname, "no party") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Venstre") | strpos(partyname, "Det Radikale Venstre") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Socialistisk Folkeparti") | strpos(partyname, "Den frie Socialdemokrat") | strpos(partyname, "Folkebevægelsen mod EU") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Dansk Folkeparti") & date < td(20jul2004)

replace PPG = "EDD" if strpos(partyname, "JuniBevægelsen") & date < td(20jul2004)

*Term 6
tab partyname if country == 7 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Det Konservative Folkeparti") | strpos(partyname, "Ny Alliance") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Socialdemokratiet") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Det Radikale Venstre") | strpos(partyname, "Venstre, Danmarks Liberale Parti") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Dansk Folkeparti") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Socialistisk Folkeparti") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Folkebevægelsen mod EU") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "JuniBevægelsen - Mod Unionen") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 7 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Det Konservative Folkeparti") | strpos(partyname, " - ") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Socialdemokratiet") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Venstre, Danmarks Liberale Parti") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Socialistisk Folkeparti") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Løsgænger") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Folkebevægelsen mod EU") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Dansk Folkeparti") & date >= td(14jul2009)
 
br if PPG == "NA" & country == 7


***Estonia*** 
*Term 5
tab partyname if country == 8 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Isamaaliit") | strpos(partyname, "Res Publica") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Sotsiaaldemokraatlik Erakond") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Eesti Reformierakond") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Eestimaa Rahvalliit") & date < td(20jul2004)

*Term 6
tab partyname if country == 8 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Erakond Isamaaliit (Pro Patria Union)") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Sotsiaaldemokraatlik Erakond") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Eesti Keskerakond") | strpos(partyname, "Eesti Reformierakond") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 8 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Erakond Isamaa ja Res Publica Liit") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Sotsiaaldemokraatlik Erakond") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Eesti Reformierakond") | strpos(partyname, "Eesti Keskerakond") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Sõltumatu") & date >= td(14jul2009)
                
br if PPG == "NA" & country == 8


***Finland*** 
replace country = 9 if strpos(partyname, "Kansallinen Kokoomus (Finland)") 

*Term 5
tab partyname if country == 9 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Kansallinen Kokoomus") | strpos(partyname, "Kansallinen Kokoomus (Finland)") | strpos(partyname, "Suomen kristillisdemokraatit rp") | strpos(partyname, "Suomen Kristillinen Liitto") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Suomen Sosialidemokraattinen Puolue/Finlands Socialdemokratiska") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Suomen Keskusta") | strpos(partyname,  "Svenska folkpartiet") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Vasemmistoliitto") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Vihreät") | strpos(partyname,  "Vihreä liitto") | strpos(partyname,  " - ") & date < td(20jul2004)
      
*Term 6
tab partyname if country == 9 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Kansallinen Kokoomus") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Suomen Sosialidemokraattinen Puolue/Finlands Socialdemokratiska") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Suomen Keskusta") | strpos(partyname, "Svenska folkpartiet") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname,  "Vihreä liitto") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Vasemmistoliitto") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 9 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Kansallinen Kokoomus") | strpos(partyname, "Suomen kristillisdemokraatit") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Suomen Sosialidemokraattinen Puolue/Finlands Socialdemokratiska") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Suomen Keskusta") | strpos(partyname, "Svenska folkpartiet") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Vihreä liitto") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Perussuomalaiset") & date >= td(14jul2009)

br if PPG == "NA" & country == 9


***France*** 
replace partyname = "Les Verts" if strpos(partyname, "Bündnis 90/Die Grünen (Germany)") 
*Term 5
tab partyname if country == 10 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Union pour un Mouvement Populaire") | strpos(partyname, "Union pour la démocratie française") | strpos(partyname, "Rassemblement pour la République") | strpos(partyname, "Démocratie libérale") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Parti socialiste") | strpos(partyname,  "Parti radical de gauche") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Union pour la démocratie française - Parti radical") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Parti communiste français") | strpos(partyname, "Lutte ouvrière") | strpos(partyname, "Ligue Communiste Révolutionnaire") | strpos(partyname, "Mouvement des citoyens") | strpos(partyname, "Socialiste Indépendant") | strpos(partyname, "Pôle Républicain") | strpos(partyname, "Indépendant") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Les Verts-Europe-Ecologie") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Rassemblement pour la France") & date < td(20jul2004)

replace PPG = "EDD" if strpos(partyname, "Chasse, Pêche, Nature, Traditions") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Front national") | strpos(partyname, "Mouvement pour la France") | strpos(partyname, "sans étiquette") | strpos(partyname, "Mouvement des citoyens") & date < td(20jul2004)

*Term 6 
tab partyname if country == 10 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Union pour un Mouvement Populaire") | strpos(partyname, "Nouveau Centre") | strpos(partyname, "Union pour la démocratie française") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Parti socialiste") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Mouvement Démocrate") | strpos(partyname, "Avenir Démocrate") | strpos(partyname, "Alliance Citoyenne pour la Démocratie en Europe") & date >= td(21jul2004) & date < td(14jul2009) 

replace PPG = "Verts/ALE" if strpos(partyname, "Les Verts-Europe-Ecologie") | strpos(partyname, "Les Verts") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Parti communiste français") | strpos(partyname, "Rassemblement Démocratique de la Martinique") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "Mouvement pour la France") | strpos(partyname, "Mouvement pour la France - Rassemblement pour l'Indépendance et la Souveraineté de la France") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Front national") | strpos(partyname, "Parti de la France") | strpos(partyname, "La Maison de la Vie") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 10 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Union pour un Mouvement Populaire") | strpos(partyname, "Union des Démocrates et Indépendants") | strpos(partyname, "Parti Radical") | strpos(partyname, "Nouveau Centre") | strpos(partyname, "Parti Radical / Union des Démocrates et Indépendants") | strpos(partyname, "La Gauche moderne") | strpos(partyname, "Union pour un Mouvement Populaire - Parti Radical") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Parti socialiste") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Mouvement Démocrate") | strpos(partyname, "Citoyenneté Action Participation pour le 21ème siècle") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Europe Écologie") | strpos(partyname, "Partitu di a Nazione Corsa") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Front de gauche") | strpos(partyname, "Parti communiste réunionnais") | strpos(partyname, "Alliance des Outre-Mers") & date >= td(14jul2009)
 
replace PPG = "EFD" if strpos(partyname, "Mouvement pour la France") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Front national") & date >= td(14jul2009)

br if PPG == "NA" & country == 10

***Germany*** 

*Term 5
tab partyname if country == 11 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Christlich Demokratische Union Deutschlands") | strpos(partyname, "Christlich-Soziale Union in Bayern e.V.") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Sozialdemokratische Partei Deutschlands") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Partei des Demokratischen Sozialismus") | strpos(partyname, "parteilos") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Bündnis 90/die Grünen") & date < td(20jul2004)

*Term 6
tab partyname if country == 11 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Christlich Demokratische Union Deutschlands") | strpos(partyname, "Christlich-Soziale Union in Bayern e.V.") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Sozialdemokratische Partei Deutschlands") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Freie Demokratische Partei - Die Liberalen") & date >= td(21jul2004) & date < td(14jul2009) 

replace PPG = "Verts/ALE" if strpos(partyname, "Bündnis 90/Die Grünen") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Partei des Demokratischen Sozialismus") | strpos(partyname, "Linkspartei.PDS") | strpos(partyname, "DIE LINKE.") & date >= td(21jul2004) & date < td(14jul2009)
 
*Term 7
tab partyname if country == 11 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Christlich Demokratische Union Deutschlands") | strpos(partyname, "Christlich-Soziale Union in Bayern e.V.") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Sozialdemokratische Partei Deutschlands") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Freie Demokratische Partei") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Bündnis 90/Die Grünen") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "DIE LINKE.") & date >= td(14jul2009)

br if PPG == "NA" & country == 11


***Greece*** 
*Term 5
tab partyname if country == 12 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Nea Dimokratia") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Panellinio Socialistiko Kinima") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Kommounistiko Komma Elladas") | strpos(partyname, "Synaspismos tis Aristeras kai tis Proodou") | strpos(partyname, "Dimokratiko Kinoniko Kinima") & date < td(20jul2004)

*Term 6
tab partyname if country == 12 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Nea Dimokratia") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Panellinio Socialistiko Kinima") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Kommounistiko Komma Elladas") | strpos(partyname, "Synaspismos tis Aristeras ton Kinimaton kai tis Oikologias") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "Laikos Orthodoxos Synagermos - G. Karatzaferis") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 12 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Nea Demokratia") | strpos(partyname,  " - ") & date >= td(14jul2009)
 
replace PPG = "S&D" if strpos(partyname, "Panhellenic Socialist Movement") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Ecologist Greens") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Democratic Alliance") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Communist Party of Greece") | strpos(partyname, "Coalition of the Radical Left") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Popular Orthodox Rally - G. Karatzaferis") & date >= td(14jul2009)

br if PPG == "NA" & country == 12


***Hungary*** 
*Term 5
tab partyname if country == 13 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Magyar Demokrata Fórum") | strpos(partyname, "Fidesz-Magyar Polgári Szövetség") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Magyar Szocialista Párt") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Szabad Demokraták Szövetsége") & date < td(20jul2004)

*Term 6
tab partyname if country == 13 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Fidesz-Magyar Polgári Szövetség") | strpos(partyname, "Magyar Demokrata Fórum") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Magyar Szocialista Párt") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Szabad Demokraták Szövetsége") & date >= td(21jul2004) & date < td(14jul2009)

replace country = 23 if strpos(partyname, "Independent (Romania)") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 13 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Fidesz-Magyar Polgári Szövetség-Keresztény Demokrata Néppárt") & date >= td(14jul2009)
 
replace PPG = "S&D" if strpos(partyname, "Magyar Szocialista Párt") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Magyar Demokrata Fórum") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Jobbik Magyarországért Mozgalom") & date >= td(14jul2009)

replace country = 23 if strpos(partyname, "Uniunea Democrată Maghiară din România (Romania)") & date >= td(14jul2009)

br if PPG == "NA" & country == 13


***Ireland*** 
*Term 5
tab partyname if country == 14 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Fine Gael Party") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Independent") & date < td(20jul2004)

*Term 6
tab partyname if country == 14 & date >= td(21jul2004) & date < td(14jul2009)
					
replace PPG = "EPP-ED" if strpos(partyname, "Fine Gael Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Labour Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, " - ") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Fianna Fáil Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Sinn Féin") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 14 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Fine Gael Party") & date >= td(14jul2009)
 
replace PPG = "ALDE" if strpos(partyname, "Fianna Fáil Party") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Socialist Party") & date >= td(14jul2009)

br if PPG == "NA" & country == 14


***Italy*** 
*Term 5
tab partyname if country == 15 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Forza Italia") | strpos(partyname, "Südtiroler Volkspartei (Partito popolare sudtirolese)") | strpos(partyname, "Partito Pensionati") | strpos(partyname, "Centro cristiano democratico") | strpos(partyname, "Cristiani democratici uniti") | strpos(partyname, "Partito popolare italiano") | strpos(partyname, "Unione democratico cristiana") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Socialisti democratici italiani") | strpos(partyname, "Democratici di Sinistra") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Italia dei Valori - Lista Di Pietro") | strpos(partyname, "Movimento Repubblicani Europei") | strpos(partyname, "Partito della rifondazione comunista") | strpos(partyname, "I Democratici") | strpos(partyname, "Partito repubblicano italiano") | strpos(partyname, "Union Valdôtaine") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Partito dei Comunisti Italiani") | strpos(partyname, "Partito comunista italiano") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Federazione dei Verdi") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Alleanza nazionale") | strpos(partyname, "Patto Segni") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Lega Nord per l'indipendenza della Padania") | strpos(partyname, "Lista Emma Bonino") & date < td(20jul2004)

replace PPG = "Technical Group of Independent Members" if strpos(partyname, "Movimento Sociale Europeo") | strpos(partyname, "Movimento sociale fiamma tricolore") & date < td(20jul2004)

*Term 6
tab partyname if country == 15 & date >= td(21jul2004) & date < td(14jul2009)
					
replace PPG = "EPP-ED" if strpos(partyname, "Unione dei Democratici cristiani e dei Democratici di Centro") | strpos(partyname, "Südtiroler Volkspartei") | strpos(partyname, "Partito Pensionati") | strpos(partyname, "Alleanza Popolare - Unione Democratici per l'Europa") | strpos(partyname, "Forza Italia") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partito Democratico") | strpos(partyname, "Sinistra Democratica") | strpos(partyname, "Partito Socialista") | strpos(partyname, "Democratici di Sinistra") | strpos(partyname, "Socialisti democratici italiani") | strpos(partyname, "Partito Socialista Nuovo PSI") & date >= td(21jul2004) & date < td(14jul2009)
  
replace PPG = "PSE" if strpos(name, "Giulietto Chiesa") | strpos(name, "Lilli Gruber") | strpos(name, "Mauro Zani") | strpos(name, "Michele Santoro") | strpos(name, "Achille Occhetto") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Partito Democratico") | strpos(partyname, "Lista Emma Bonino") | strpos(partyname, "Indipendente - Lista Italia dei Valori - Di Pietro Occhetto") | strpos(partyname, "Uniti nell'Ulivo") | strpos(partyname, "La Margherita") | strpos(partyname, "Italia dei Valori") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Alleanza Siciliana") | strpos(partyname, "Lega Nord per l'indipendenza della Padania") | strpos(partyname, "La Destra - Alleanza Siciliana") | strpos(partyname, "Alleanza nazionale") & date >= td(21jul2004) & date < td(14jul2009)
 
replace PPG = "UEN" if strpos(name, "Giovanni Robusti") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Federazione dei Verdi") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Partito dei Comunisti Italiani") | strpos(partyname, "Partito della Rifondazione Comunista - Sinistra Europea") | strpos(partyname, "Partito della Rifondazione Comunista - Sinistra Europea (Indipendente)") | strpos(partyname, "Partito del Sud") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ELDR" if strpos(partyname, "Movimento Repubblicani Europei") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Movimento Sociale Fiamma tricolore") | strpos(partyname, "Alternativa sociale: Lista Mussolini") | strpos(partyname, "Forza Nuova") & date >= td(21jul2004) & date < td(14jul2009)
						 
*Term 7
tab partyname if country == 15 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Unione Democratici per l'Europa") | strpos(partyname, "Fratelli d'Italia - Centrodestra Nazionale") | strpos(partyname, "Futuro e Libertà per l'Italia") | strpos(partyname, "Südtiroler Volkspartei") | strpos(partyname, "Unione dei Democratici cristiani e dei Democratici di Centro") | strpos(partyname, "Il Popolo della Libertà") & date >= td(14jul2009)

replace PPG = "S&D" if strpos(partyname, "Partito Democratico") | strpos(name, "Gianluca Susta") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Fianna Fáil Party") | strpos(name, "Andrea Zanoni") | strpos(name, "Vincenzo Iovine") | strpos(partyname, "Italia dei Valori - Lista Di Pietro (Indipendente)") | strpos(partyname, "Italia dei Valori - Lista Di Pietro") & date >= td(14jul2009)
 
replace PPG = "ECR" if strpos(partyname, "Conservatori e Social Riformatori") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Io amo l'Italia") | strpos(partyname, "Lega Nord") & date >= td(14jul2009)
 
replace PPG = "GUE/NGL" if strpos(partyname, "Socialist Party") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname,  "Alleanza nazionale") & date >= td(14jul2009)

br if PPG == "NA" & country == 15


***Latvia*** 
*Term 5
tab partyname if country == 16 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Jaunais laiks") | strpos(partyname, "Tautas partija") | strpos(partyname, "Latvijas Pirmā Partija") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Tautas Saskanas Partija") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Par cilvēka tiesībām vienotā Latvijā") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Tēvzemei un Brīvībai/LNNK") & date < td(20jul2004)

*Term 6
tab partyname if country == 16 & date >= td(21jul2004) & date < td(14jul2009)
					
replace PPG = "EPP-ED" if strpos(partyname, "Jaunais laiks") | strpos(partyname, "Tautas partija") | strpos(partyname, "Pilsoniskā Savienība") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Latvijas Celš") | strpos(name, "Georgs Andrejevs") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Tēvzemei un Brīvībai/LNNK") | strpos(partyname, "Pilsoniskā Savienība") | strpos(name, "Guntars Krasts") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Politisko organizāciju savienība") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Sinn Féin") & date >= td(21jul2004) & date < td(14jul2009)


*Term 7
tab partyname if country == 16 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Vienotība") | strpos(partyname, "Pilsoniskā Savienība") | strpos(partyname, "Jaunais laiks") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Alternative") | strpos(partyname, " - ") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Latvijas Pirmā Partija/Latvijas Ceļš") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Tēvzemei un Brīvībai/LNNK") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Politisko partiju apvienība") | strpos(partyname, "Par cilvēka tiesībām vienotā Latvijā") & date >= td(14jul2009)

br if PPG == "NA" & country == 16


***Lithuania*** 
*Term 5
tab partyname if country == 17 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Tėvynės Sąjunga") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Lietuvos Socialdemokratų Partija") & date < td(20jul2004)

*Term 6
tab partyname if country == 17 & date >= td(21jul2004) & date < td(14jul2009)
			
replace PPG = "EPP-ED" if strpos(partyname, "Tėvynės sąjunga") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Lietuvos socialdemokratų partija") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ELDR" if strpos(partyname, "Liberalų demokratų partija") & date >= td(21jul2004) & date < td(14jul2009)
 
replace PPG = "ALDE" if strpos(partyname, "Darbo partija") | strpos(partyname, "Lietuvos Respublikos liberalų sąjūdis") | strpos(partyname, "Liberalų ir centro sąjunga") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(name, "Ona Juknevičienė") | strpos(name, "Arūnas Degutis") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Lietuvos valstiečių liaudininkų sąjunga") | strpos(partyname, "Valstiečių ir Naujosios demokratijos partijų sąjunga") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 17 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Tėvynės sąjunga - Lietuvos krikščionys demokratai") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Lietuvos socialdemokratų partija") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Darbo partija") | strpos(partyname, "Lietuvos Respublikos liberalų sąjūdis") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Lietuvos lenkų rinkimų akcija") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Partija Tvarka ir teisingumas") & date >= td(14jul2009)

br if PPG == "NA" & country == 17


***Luxembourg*** 
*Term 5
tab partyname if country == 18 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Parti chrétien social") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Parti ouvrier socialiste luxembourgeois") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Parti démocratique") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Les Verts") & date < td(20jul2004)

*Term 6
tab partyname if country == 18 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Parti chrétien social") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Parti ouvrier socialiste luxembourgeois") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ELDR" if strpos(partyname, "Liberalų demokratų partija") & date >= td(21jul2004) & date < td(14jul2009)
 
replace PPG = "ALDE" if strpos(partyname, "Parti démocratique") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Les Verts") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 18 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Parti chrétien social luxembourgeois") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Parti ouvrier socialiste luxembourgeois") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Parti démocratique") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Déi Gréng - Les Verts") & date >= td(14jul2009)

br if PPG == "NA" & country == 18


***Malta*** 
*Term 5
tab partyname if country == 19 & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Partit Laburista") & date < td(20jul2004)

*Term 6
tab partyname if country == 19 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Partit Nazzjonalista") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partit Laburista") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 19 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Partit Nazzjonalista") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Partit Laburista") & date >= td(14jul2009)

br if PPG == "NA" & country == 19


***Netherlands*** 
*Term 5
tab partyname if country == 20 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Christen Democratisch Appèl") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Partij van de Arbeid") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Volkspartij voor Vrijheid en Democratie") | strpos(partyname, "Democraten 66") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Socialistische Partij") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "GroenLinks") & date < td(20jul2004)

replace PPG = "EDD" if strpos(partyname, "ChristenUnie - Staatkundig Gereformeerde Parti") | strpos(partyname, "Staatkundig Gereformeerde Partij - Gereformeerd Politiek Verbond - Reformatorisch Politieke Federatie") & date < td(20jul2004)

*Term 6
tab partyname if country == 20 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Christen Democratisch Appèl") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partij van de Arbeid") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Volkspartij voor Vrijheid en Democratie") | strpos(partyname, "Democraten 66") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "GroenLinks") | strpos(partyname, "Europa Transparant") | strpos(partyname, "Independent") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Socialistische Partij") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "ChristenUnie - Staatkundig Gereformeerde Parti") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 20 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Christen Democratisch Appèl") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Partij van de Arbeid") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Volkspartij voor Vrijheid en Democratie") | strpos(partyname, "Democraten 66") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "GroenLinks") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "ChristenUnie") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Socialistische Partij") | strpos(partyname, "Onafhankelijk lid") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Staatkundig Gereformeerde Partij") | strpos(partyname, "ChristenUnie - Staatkundig Gereformeerde Parti") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname,  "Partij voor de Vrijheid") & date >= td(14jul2009)

br if PPG == "NA" & country == 20


***Poland*** 
*Term 5
tab partyname if country == 21 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Blok Senat 2001") | strpos(partyname, "Platforma Obywatelska") | strpos(partyname, "Polskie Stronnictwo Ludowe") | strpos(partyname, "Stronnictwo Konserwatywno-Ludowe") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Sojusz Lewicy Demokratycznej") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Prawo i Sprawiedliwość") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname,  "Liga Polskich Rodzin") | strpos(partyname, "Polski Blok Ludowy") | strpos(partyname, "Ruch Katolicko-Narodowy") | strpos(partyname, "Samoobrona RP") & date < td(20jul2004)

*Term 6
tab partyname if country == 21 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Platforma Obywatelska") | strpos(partyname, "Polskie Stronnictwo Ludowe") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Sojusz Lewicy Demokratycznej - Unia Pracy") | strpos(partyname, "Socjaldemokracja Polska") | strpos(partyname, "Unia Pracy") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Partia Demokratyczna") | strpos(partyname, "Unia Wolnosci") | strpos(partyname, "Unia Wolnosci/Partia Demokratyczna - demokraci.pl") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "UEN" if strpos(partyname, "Prawo i Sprawiedliwość") | strpos(partyname, "Naprzód Polsko") | strpos(partyname, "Polskie Stronnictwo Ludowe") | strpos(partyname, "Samoobrona RP") | strpos(partyname, "Forum Polskie") | strpos(partyname, "Stronnictwo Piast") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "Liga Polskich Rodzin") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(name, "Krzysztof Hołowczyc") & date >= td(21jul2004) & date < td(14jul2009) & strpos(partyname, " - ")

replace PPG = "PSE" if strpos(name, "Bogdan Golik") | strpos(name, "Dariusz Rosati") & date >= td(21jul2004) & date < td(14jul2009) & strpos(partyname, " - ")

replace PPG = "UEN" if strpos(name, "Ryszard Czarnecki") | strpos(name, "Andrzej Tomasz Zapałowski") | strpos(name, "Mirosław Piotrowski") | strpos(name, "Bogdan Pęk") | strpos(name, "Dariusz Grabowski") & date >= td(21jul2004) & date < td(14jul2009) & strpos(partyname, " - ")

replace PPG = "NI - non-attached members" if strpos(name, "Bernard Piotr Wojciechowski") | strpos(name, "Marek Czarnecki") & date >= td(21jul2004) & date < td(14jul2009) & strpos(partyname, " - ")

replace PPG = "IND/DEM" if strpos(name, "Urszula Krupa") & date >= td(21jul2004) & date < td(14jul2009) & strpos(partyname, " - ")
  
*Term 7
tab partyname if country == 21 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Platforma Obywatelska") | strpos(partyname, "Polskie Stronnictwo Ludowe") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Sojusz Lewicy Demokratycznej")  | strpos(partyname, "Unia Pracy") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Prawo i Sprawiedliwość") | strpos(partyname, "Polska Jest Najważniejsza") | strpos(partyname, "Niezależny") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Solidarna Polska") & date >= td(14jul2009)

br if PPG == "NA" & country == 21


***Portugal*** 
*Term 5
tab partyname if country == 22 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Partido Social Democrata") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Partido Socialista") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Partido Comunista Português") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Partido Popular") & date < td(20jul2004)
		   
*Term 6
tab partyname if country == 22 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Partido Social Democrata") | strpos(partyname, "Partido Popular") | strpos(partyname, "Coligaçao Força Portugal (PPD/PSD.CDS-PP)") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partido Socialista") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Bloco de Esquerda") | strpos(partyname, "Partido Comunista Português") | strpos(partyname, "Coligação Democrática Unitária (PCP-PEV)") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 22 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Partido Popular") | strpos(partyname, "Partido Social Democrata") & date >= td(14jul2009) 

replace PPG = "S&D" if strpos(partyname, "Partido Socialista") | strpos(partyname, "Parti socialiste") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Bloco de Esquerda (Independente)") | strpos(partyname, "Partido Comunista Português") | strpos(partyname, "Bloco de Esquerda") | strpos(partyname, "Coligação Democrática Unitária (PCP-PEV)") & date >= td(14jul2009)		    

replace PPG = "Greens/EFA" if strpos(partyname, "Independente") & date >= td(14jul2009)

br if PPG == "NA" & country == 22


***Romania*** 
*Term 6
tab partyname if country == 23 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Partidul Democrat-Liberal") | strpos(partyname, "Uniunea Democrată Maghiară din România") | strpos(partyname, "Forumul Democrat al Germanitor din România") | strpos(partyname, "Partidul Democrat") | strpos(partyname, "Partidul Liberal Democrat") | strpos(partyname, "Romániai Magyar Demokrata Szövetség") | strpos(partyname, "Independent (Romania)") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partidul Social Democrat") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Partidul Naţional Liberal") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Partidul Conservator") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Partidul România Mare") | strpos(partyname, " - ") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 23 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Partidul Democrat-Liberal") | strpos(partyname, "Uniunea Democrată Maghiară din România") | strpos(partyname, "Uniunea Democrată Maghiară din România (Romania)") & date >= td(14jul2009)  

replace PPG = "S&D" if strpos(partyname, "Partidul Social Democrat + Partidul Conservator") | strpos(partyname, "Partidul Social Democrat") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Partidul Naţional Liberal") & date >= td(14jul2009)		    

replace PPG = "NI - non-attached members" if strpos(partyname, "Partidul România Mare") & date >= td(21jul2004) & date < td(14jul2009)

br if PPG == "NA" & country == 23


***Slovakia*** 
*Term 5
tab partyname if country == 24 & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Smer") & date < td(20jul2004)

replace PPG = "UEN" if strpos(partyname, "Ľudová únia") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Hnutie za demokratické Slovensko") & date < td(20jul2004) 

*Term 6
tab partyname if country == 24 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Strana mad'arskej koalície - Magyar Koalíció Pártja") | strpos(partyname, "Kresťanskodemokratické hnutie") | strpos(partyname, "Slovenská demokratická a kresťanská únia") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Smer") | strpos(partyname, "SMER-Sociálna demokracia") | strpos(partyname, "Strana demokratickej ľavice") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Hnutie za demokratické Slovensko") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 24 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Kresťanskodemokratické hnutie") | strpos(partyname, "Slovenská demokratická a kresťanská únia - Demokratická strana") | strpos(partyname, "Strana mad'arskej komunity- Magyar Közösség Pártja") & date >= td(14jul2009)  

replace PPG = "S&D" if strpos(partyname, "SMER-Sociálna demokracia") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "Slovenská národná strana") & date >= td(14jul2009)		    

replace PPG = "ALDE" if strpos(partyname, "Ľudová strana - Hnutie za demokratické Slovensko") & date >= td(21jul2004) & date < td(14jul2009)

br if PPG == "NA" & country == 24


***Slovenia*** 
*Term 5
tab partyname if country == 25 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Nova Slovenija") | strpos(partyname, "Slovenska Ljudska Stranka") | strpos(partyname, "Socialdemokratska stranka Slovenije") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Združena lista socialnih demokratov") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Liberalna Demokracija Slovenije") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Hnutie za demokratické Slovensko") & date < td(20jul2004)

*Term 6
tab partyname if country == 25 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Nova Slovenija") | strpos(partyname, "Slovenska demokratska stranka") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Socialni demokrati") | strpos(partyname, "Združena lista socialnih demokratov") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Liberalna Demokracija Slovenije") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 25 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Nova Slovenija - Krščanska ljudska stranka") | strpos(partyname, "Slovenska demokratska stranka") & date >= td(14jul2009)  

replace PPG = "S&D" if strpos(partyname, "Socialni demokrati") & date >= td(14jul2009)		    

replace PPG = "ALDE" if strpos(partyname, "ZARES-Nova Politika") | strpos(partyname, "Liberalna Demokracija Slovenije") & date >= td(14jul2009)		    

br if PPG == "NA" & country == 25


***Spain*** 
*Term 5
tab partyname if country == 26 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Unió Democràtica de Catalunya") | strpos(partyname, "Partido Popular") & date < td(20jul2004)

replace PPG = "S&D" if strpos(partyname, "Partit dels Socialistes de Catalunya") | strpos(partyname, "Partido Socialista Obrero Español (Progresistas)") | strpos(partyname, "Partido Socialista Obrero Español") | strpos(partyname, "Nueva Izquierda") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Coalicion Canaria") | strpos(partyname, "Convergència Democràtica Catalunya") & date < td(20jul2004)

replace PPG = "Greens/EFA" if strpos(partyname, "Eusko Alkartasuna") | strpos(partyname, "Partido Andalucista") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Izquierda Unida") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Partido Aragones") | strpos(partyname, "Bloc Nacionalista Valencia") | strpos(partyname, "Bloque Nacionalista Galego") | strpos(partyname, "Esquerra Republicana de Catalunya") | strpos(partyname, "Partido Nacionalista Vasco") & date < td(20jul2004)

replace PPG = "NI - non-attached members" if strpos(partyname, "Euskal Herritarrok") & date < td(20jul2004) 

*Term 6
tab partyname if country == 26 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Partido Popular") | strpos(partyname, "Unión del Pueblo Navarro") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Partido Socialista Obrero Español") | strpos(partyname, "Partit dels Socialistes de Catalunya") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Convergència Democràtica Catalunya") | strpos(partyname, "Partido Nacionalista Vasco") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Los Verdes") | strpos(partyname, "Iniciativa Per Catalunya Verds - Esquerra Unida í Alternativa") | strpos(partyname, "Eusko Alkartasuna") | strpos(partyname, "Europa de los Pueblos") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Izquierda Unida") | strpos(partyname, "Izquierda Unida - Iniciativa Per Catalunya Verds - Esquerra Unida í Alternativa") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 26 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Unió Democràtica de Catalunya") | strpos(partyname, "Partido Popular") & date >= td(14jul2009)  

replace PPG = "S&D" if strpos(partyname, "Partido Socialista Obrero Español") | strpos(partyname, "Partit dels Socialistes de Catalunya") & date >= td(14jul2009)

replace PPG = "Greens/EFA" if strpos(partyname, "Aralar") | strpos(partyname, "Iniciativa per Catalunya Verds") | strpos(partyname, "Bloque Nacionalista Galego") | strpos(partyname, "Esquerra Republicana de Catalunya") & date >= td(14jul2009)		

replace PPG = "ALDE" if strpos(partyname, "Convergència Democràtica de Catalunya") | strpos(partyname, "Partido Nacionalista Vasco") & date >= td(14jul2009)		

replace PPG = "GUE/NGL" if strpos(partyname, "Izquierda Unida") & date >= td(14jul2009)

replace PPG = "NI - non-attached members" if strpos(partyname, "Unión, Progreso y Democracia") & date >= td(14jul2009)	

br if PPG == "NA" & country == 26


***Sweden*** 
*Term 5
tab partyname if country == 27 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Kristdemokraterna") | strpos(partyname, "Moderata samlingspartiet") & date < td(20jul2004)

replace PPG = "PSE" if strpos(partyname, "Socialdemokratiska arbetarepartiet") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Centerpartiet") | strpos(partyname, "Folkpartiet liberalerna") & date < td(20jul2004)

replace PPG = "GUE/NGL" if strpos(partyname, "Vänsterpartiet") & date < td(20jul2004)

replace PPG = "Verts/ALE" if strpos(partyname, "Miljöpartiet") & date < td(20jul2004) 

*Term 6
tab partyname if country == 27 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Moderata Samlingspartiet") | strpos(partyname, "Kristdemokraterna") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Arbetarepartiet- Socialdemokraterna") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Centerpartiet") | strpos(partyname, "Folkpartiet liberalerna") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Miljöpartiet") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Vänsterpartiet") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "IND/DEM" if strpos(partyname, "Junilistan") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 27 & date >= td(14jul2009)

replace PPG = "EPP" if strpos(partyname, "Kristdemokraterna") | strpos(partyname, "Moderata Samlingspartiet") & date >= td(14jul2009)  

replace PPG = "S&D" if strpos(partyname, "Arbetarepartiet- Socialdemokraterna") & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Centerpartiet") | strpos(partyname, "Folkpartiet liberalerna") & date >= td(14jul2009)	

replace PPG = "Greens/EFA" if strpos(partyname, "Miljöpartiet de gröna") | strpos(partyname, "Piratpartiet") & date >= td(14jul2009)		    

replace PPG = "GUE/NGL" if strpos(partyname, "Vänsterpartiet") & date >= td(14jul2009)

br if PPG == "NA" & country == 27


***United Kingdom*** 
*Term 5
tab partyname if country == 28 & date < td(20jul2004)

replace PPG = "EPP-ED" if strpos(partyname, "Conservative and Unionist Party") & date < td(20jul2004) 

replace PPG = "PSE" if strpos(partyname, "Labour Party") & date < td(20jul2004)

replace PPG = "ELDR" if strpos(partyname, "Liberal Democrat Party") & date < td(20jul2004)

*Term 6
tab partyname if country == 28 & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "EPP-ED" if strpos(partyname, "Conservative and Unionist Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "PSE" if strpos(partyname, "Labour Party") & date >= td(21jul2004) & date < td(14jul2009)
 
replace PPG = "ALDE" if strpos(partyname, "Liberal Democrat Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "Verts/ALE" if strpos(partyname, "Green Party") & date >= td(21jul2004) & date < td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Sinn Féin") & date >= td(21jul2004) & date < td(14jul2009)

*Term 7
tab partyname if country == 28 & date >= td(14jul2009)

replace PPG = "ALDE" if strpos(partyname, "Liberal Democrats Party") & date >= td(14jul2009)

replace PPG = "ECR" if strpos(partyname, "Conservative Party") & date >= td(14jul2009)

replace PPG = "GUE/NGL" if strpos(partyname, "Sinn Féin") & date >= td(14jul2009)

replace PPG = "EFD" if strpos(partyname, "United Kingdom Independence Party") & date >= td(14jul2009)

br if PPG == "NA" & country == 28

br if PPG == "NA"

*(5) Dummies for PPGs - general groupings (& national Parties)
/* See: https://en.wikipedia.org/wiki/Political_groups_of_the_European_Parliament#Christian_democrats_and_conservatives */
*and women's representation in them
/* See: 
https://www.europarl.europa.eu/at-your-service/files/be-heard/eurobarometer/2019/review_of_european_and_national_election_results_2019/incoming_ep_2019.pdf */

tab PPG 

gen women_ppg = .
label var women_ppg "Average Percentage of Women in PPGs per term"

*EPP-ED & EPP
replace PPG = "EPP-ED" if date < td(14jul2009) & (strpos(PPG, "EPP-ED") | strpos(PPG, "EPP")) 
replace PPG = "EPP" if date >= td(14jul2009) & (strpos(PPG, "EPP-ED") | strpos(PPG, "EPP"))

gen conservatives = 1 if PPG == "EPP-ED" | PPG == "EPP"
replace conservatives = 0 if conservatives ==.

label var conservatives "Christian democrats and conservatives"
label define ///
	cdc ///
	0 "No" ///
	1 "Yes"
	
label values ///
	conservatives cdc

replace women_ppg = 25.75 if date < td(10aug2004) & PPG == "EPP-ED"
replace women_ppg = 23.13 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "EPP-ED"
replace women_ppg = 24.31 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "EPP-ED" | PPG == "EPP")
replace women_ppg = 33.58 if PPG == "EPP" & date >= td(20jul2009)
	

*PSE & S&D
replace PPG = "PSE" if date < td(20jul2009) & (strpos(PPG, "S&D") | strpos(PPG, "PSE"))
replace PPG = "S&D" if date >= td(20jul2009) & (strpos(PPG, "S&D") | strpos(PPG, "PSE"))

gen social_democrats = 1 if PPG == "PSE" | PPG == "S&D"
replace social_democrats = 0 if social_democrats ==.

label var social_democrats "Social democrats"
label define ///
	sd ///
	0 "No" ///
	1 "Yes"
	
label values ///
	social_democrats sd

replace women_ppg = 36.11 if date < td(10aug2004) & PPG == "PSE"
replace women_ppg = 39.9 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "PSE"
replace women_ppg = 41.47 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "PSE")
replace women_ppg = 40.22 if PPG == "S&D" & date >= td(20jul2009)


*ELDR & ALDE
replace PPG = "ELDR" if date < td(10aug2004) & (strpos(PPG, "ALDE") | strpos(PPG, "ELDR"))
replace PPG = "ALDE" if date >= td(10aug2004) & (strpos(PPG, "ALDE") | strpos(PPG, "ELDR"))

gen liberals_centrists = 1 if PPG == "ALDE" | PPG == "ELDR"
replace liberals_centrists = 0 if liberals_centrists ==.

label var liberals_centrists "Liberals and centrists"
label define ///
	lc ///
	0 "No" ///
	1 "Yes"
	
label values ///
	liberals_centrists lc

replace women_ppg = 34 if date < td(10aug2004) & PPG == "ELDR"
replace women_ppg = 38.64 if date >= td(10aug2004) & date < td(25mar2009) & (PPG == "ELDR" | PPG == "ALDE")
replace women_ppg = 40 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "ALDE")
replace women_ppg = 45.24 if PPG == "ALDE" & date >= td(20jul2009)


*UEN
replace PPG = "UEN" if date < td(20jul2009) & (strpos(PPG, "ECR") | strpos(PPG, "UEN"))

gen uen = 1 if PPG == "UEN"
replace uen = 0 if uen ==.

label var uen "Union for Europe of the Nations"
label define ///
	ue ///
	0 "No" ///
	1 "Yes"
	
label values ///
	uen ue

replace women_ppg = 23.33 if date < td(10aug2004) & PPG == "UEN"
replace women_ppg = 18.52 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "UEN" 
replace women_ppg = 11.36 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "UEN")


*ECR 
replace PPG = "ECR" if date >= td(20jul2009) & (strpos(PPG, "ECR") | strpos(PPG, "UEN"))

gen ecr = 1 if PPG == "ECR"
replace ecr = 0 if ecr ==.

label var ecr "European Conservatives and Reformists"
label define ///
	ec ///
	0 "No" ///
	1 "Yes"
	
label values ///
	ecr ec
	
replace women_ppg = 12.96 if PPG == "ECR" & date >= td(20jul2009)


*Greens/EFA
replace PPG = "Greens/EFA" if strpos(PPG, "Greens/EFA") | strpos(PPG, "Verts/ALE")

gen greens = 1 if PPG == "Greens/EFA" 
replace greens = 0 if greens ==.

label var greens "Greens/European Free Alliance"
label define ///
	gr ///
	0 "No" ///
	1 "Yes"
	
label values ///
	greens gr

replace women_ppg = 41.67 if date < td(10aug2004) & PPG == "Greens/EFA"
replace women_ppg = 47.62 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "Greens/EFA"
replace women_ppg = 46.51 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "Greens/EFA"
replace women_ppg = 54.55 if PPG == "Greens/EFA" & date >= td(20jul2009)


*GUE/NGL
gen gue_ngl = 1 if PPG == "GUE/NGL" 
replace gue_ngl = 0 if gue_ngl ==.

label var gue_ngl "European United Left/Nordic Green Left"
label define ///
	com ///
	0 "No" ///
	1 "Yes"
	
label values ///
	gue_ngl com
	
replace women_ppg = 35.71 if date < td(10aug2004) & PPG == "GUE/NGL"
replace women_ppg = 29.27 if date >= td(10aug2004) & date < td(20jul2009) & PPG == "GUE/NGL"
replace women_ppg = 28.57 if PPG == "GUE/NGL" & date >= td(20jul2009)


*Eurosceptics (EDD, IND/DEM, EFD)
replace PPG = "EDD" if date < td(10aug2004) & (strpos(PPG, "EFD") | strpos(PPG, "IND/DEM"))
replace PPG = "IND/DEM" if date >= td(10aug2004) & date < td(20jul2009) & (strpos(PPG, "EDD") | strpos(PPG, "EFD"))
replace PPG = "EFD" if date >= td(20jul2009) & (strpos(PPG, "IND/DEM") | strpos(PPG, "EDD"))

gen eurosceptics = 1 if PPG == "EDD" | PPG == "EFD" | PPG == "IND/DEM"
replace eurosceptics = 0 if eurosceptics ==.

label var eurosceptics "Eurosceptics (EDD, IND/DEM, EFD)"
label define ///
	eur ///
	0 "No" ///
	1 "Yes"
	
label values ///
	eurosceptics eur
	
replace women_ppg = 12.5 if date < td(10aug2004) & PPG == "EDD"
replace women_ppg = 8.11 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "IND/DEM"
replace women_ppg = 18.18 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "IND/DEM"
replace women_ppg = 15.63 if PPG == "EFD" & date >= td(20jul2009)


*Independent (Non-attached & TDI)
gen independent = 1 if PPG == "NI - non-attached members" | PPG == "Technical Group of Independent Members"
replace independent = 0 if independent ==.

label var independent "Independent Members (incl. TI in Term 5)"
label define ///
	ind ///
	0 "No" ///
	1 "Yes"
	
label values ///
	independent ind
	
replace women_ppg = 5.56 if date < td(10aug2004) & PPG == "Technical Group of Independent Members"
replace women_ppg = 22.22 if date < td(10aug2004) & PPG == "NI - non-attached members"
replace women_ppg = 24.14 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "NI - non-attached members"
replace women_ppg = 13.33 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "NI - non-attached members"
replace women_ppg = 18.52 if PPG == "NI - non-attached members" & date >= td(20jul2009)

br if women_ppg ==.

gen conservative_ideology = 1 if PPG == "EDD" | PPG == "EFD" | PPG == "IND/DEM" | PPG == "ECR" | PPG == "UEN" | PPG == "EPP-ED" | PPG == "EPP"
replace conservative_ideology = 0 if conservative_ideology ==.

label var conservative_ideology "PPGs with a conservative ideology"
label define ///
	consid ///
	0 "No" ///
	1 "Yes" 

label values ///
	conservative_ideology consid

*(6) Delegations
/*Data from: https://www.europarl.europa.eu/election-results-2019/en/mep-gender-balance/2014-2019/ */
gen women_del =.
label var women_del "Yearly Average of Women in Country Delegations (in %)"
*Austria 1
replace women_del = 38 if country == 1 & date < td(20jul2004)
replace women_del = 28 if country == 1 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 41 if country == 1 & date >= td(14jul2009)
*Belgium 2
replace women_del = 28 if country == 2 & date < td(20jul2004)
replace women_del = 33 if country == 2 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 36 if country == 2 & date >= td(14jul2009)
*Bulgaria 3
replace women_del = 44 if country == 3 & date < td(14jul2009)
replace women_del = 41 if country == 3 & date >= td(14jul2009)
*Croatia 4
replace women_del = 33 if country == 4
*Cyprus 5
replace women_del = 0 if country == 5 & date < td(14jul2009)
replace women_del = 33 if country == 5 & date >= td(14jul2009)
*Czech Republic 6
replace women_del = 21 if country == 6 & date < td(14jul2009)
replace women_del = 18 if country == 6 & date >= td(14jul2009)
*Denmark 7
replace women_del = 38 if country == 7 & date < td(20jul2004)
replace women_del = 43 if country == 7 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 46 if country == 7 & date >= td(14jul2009)
*Estonia 8
replace women_del = 50 if country == 8 & date < td(14jul2009)
replace women_del = 50 if country == 8 & date >= td(14jul2009)
*Finland 9 
replace women_del = 44 if country == 9 & date < td(20jul2004)
replace women_del = 43 if country == 9 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 62 if country == 9 & date >= td(14jul2009)
*France 10 
replace women_del = 40 if country == 10 & date < td(20jul2004)
replace women_del = 45 if country == 10 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 44 if country == 10 & date >= td(14jul2009)
*Germany 11
replace women_del = 37 if country == 11 & date < td(20jul2004)
replace women_del = 33 if country == 11 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 37 if country == 11 & date >= td(14jul2009)
*Greece 12
replace women_del = 16 if country == 12 & date < td(20jul2004)
replace women_del = 29 if country == 12 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 32 if country == 12 & date >= td(14jul2009)
*Hungary 13
replace women_del = 37 if country == 13 & date < td(14jul2009)
replace women_del = 36 if country == 13 & date >= td(14jul2009)
*Ireland 14
replace women_del = 33 if country == 14 & date < td(20jul2004)
replace women_del = 38 if country == 14 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 15 if country == 14 & date >= td(14jul2009)
*Italy 15
replace women_del = 11 if country == 15 & date < td(20jul2004)
replace women_del = 21 if country == 15 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 21 if country == 15 & date >= td(14jul2009)
*Latvia 16
replace women_del = 33 if country == 16 & date < td(14jul2009)
replace women_del = 38 if country == 16 & date >= td(14jul2009)
*Lithuania 17 
replace women_del = 38 if country == 17 & date < td(14jul2009)
replace women_del = 25 if country == 17 & date >= td(14jul2009)
*Luxembourg 18
replace women_del = 33 if country == 18 & date < td(20jul2004)
replace women_del = 50 if country == 18 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 17 if country == 18 & date >= td(14jul2009)
*Malta 19
replace women_del = 0 if country == 19 & date < td(14jul2009)
replace women_del = 0 if country == 19 & date >= td(14jul2009)
*Netherlands 20
replace women_del = 35 if country == 20 & date < td(20jul2004)
replace women_del = 48 if country == 20 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 48 if country == 20 & date >= td(14jul2009)
*Poland 21
replace women_del = 15 if country == 21 & date < td(14jul2009)
replace women_del = 22 if country == 21 & date >= td(14jul2009)
*Portugal 22
replace women_del = 20 if country == 22 & date < td(20jul2004)
replace women_del = 25 if country == 22 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 36 if country == 22 & date >= td(14jul2009)
*Romania 23
replace women_del = 29 if country == 23 & date < td(14jul2009)
replace women_del = 36 if country == 23 & date >= td(14jul2009)
*Slovakia 24
replace women_del = 36 if country == 24 & date < td(14jul2009)
replace women_del = 38 if country == 24 & date >= td(14jul2009)
*Slovenia 25
replace women_del = 43 if country == 25 & date < td(14jul2009)
replace women_del = 29 if country == 25 & date >= td(14jul2009)
*Spain 26
replace women_del = 35 if country == 26 & date < td(20jul2004)
replace women_del = 25 if country == 26 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 36 if country == 26 & date >= td(14jul2009)
*Sweden 27
replace women_del = 41 if country == 27 & date < td(20jul2004)
replace women_del = 47 if country == 27 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 56 if country == 27 & date >= td(14jul2009)
*United Kingdom 28
replace women_del = 24 if country == 28 & date < td(20jul2004)
replace women_del = 26 if country == 28 & date >= td(20jul2004) & date < td(14jul2009)
replace women_del = 33 if country == 28 & date >= td(14jul2009)

*(7) National Representation 
/*Data from: Parline database on national parliaments (https://data.ipu.org)*/
gen women_np = 0
label var women_np "Yearly Average of Women in National Parliaments (in %)"

*Austria 
replace women_np = 26.23 if date < td(1jan2000) & country == 1
replace women_np = 26.78 if date < td(1jan2002) & date >= td(1jan2000) & country == 1
replace women_np = 33.88 if date < td(1jan2006) & date >= td(1jan2002) & country == 1
replace women_np = 32.24 if date < td(1jan2007) & date >= td(1jan2006) & country == 1
replace women_np = 32.79 if date < td(1jan2008) & date >= td(1jan2007) & country == 1
replace women_np = 27.87 if date < td(1jan2009) & date >= td(1jan2008) & country == 1
replace women_np = 28.42 if date < td(1jan2010) & date >= td(1jan2009) & country == 1
replace women_np = 27.87 if date < td(1jan2012) & date >= td(1jan2010) & country == 1
replace women_np = 33.33 if date < td(1jan2014) & date >= td(1jan2012) & country == 1

*Belgium
replace women_np = 23.33 if date < td(1jan2003) & country == 2
replace women_np = 35.33 if date < td(1jan2008) & date >= td(1jan2003) & country == 2
replace women_np = 36.67 if date < td(1jan2009) & date >= td(1jan2008) & country == 2
replace women_np = 38 if date < td(1jan2010) & date >= td(1jan2009) & country == 2
replace women_np = 39.33 if date < td(1jan2016) & date >= td(1jan2010) & country == 2

*Bulgaria
replace women_np = 10.83 if date < td(1jan2001) & country == 3
replace women_np = 26.25 if date < td(1jan2005) & date >= td(1jan2001) & country == 3
replace women_np = 20.83 if date < td(1jan2007) & date >= td(1jan2005) & country == 3
replace women_np = 21.67 if date < td(1jan2008) & date >= td(1jan2007) & country == 3
replace women_np = 20.83 if date < td(1jan2013) & date >= td(1jan2008) & country == 3
replace women_np = 24.58 if date < td(1jan2014) & date >= td(1jan2013) & country == 3

*Croatia
replace women_np = 14.93 if date < td(1jan2000) & country == 4
replace women_np = 20.53 if date < td(1jan2003) & date >= td(1jan2000) & country == 4
replace women_np = 17.76 if date < td(1jan2007) & date >= td(1jan2003) & country == 4
replace women_np = 20.92 if date < td(1jan2009) & date >= td(1jan2007) & country == 4
replace women_np = 23.53 if date < td(1jan2011) & date >= td(1jan2009) & country == 4
replace women_np = 23.84 if date < td(1jan2012) & date >= td(1jan2011) & country == 4
replace women_np = 21.19 if date < td(1jan2013) & date >= td(1jan2012) & country == 4
replace women_np = 23.84 if date < td(1jan2014) & date >= td(1jan2013) & country == 4

*Cyprus
replace women_np = 5.36 if date < td(1jan2001) & country == 5
replace women_np = 10.71 if date < td(1jan2006) & date >= td(1jan2001) & country == 5
replace women_np = 14.29 if date < td(1jan2010) & date >= td(1jan2006) & country == 5
replace women_np = 12.5 if date < td(1jan2011) & date >= td(1jan2010) & country == 5
replace women_np = 10.71 if date < td(1jan2014) & date >= td(1jan2011) & country == 5

*Czech Republic
replace women_np = 15 if date < td(1jan2002) & country == 6
replace women_np = 17 if date < td(1jan2006) & date >= td(1jan2002) & country == 6
replace women_np = 15.5 if date < td(1jan2007) & date >= td(1jan2006) & country == 6
replace women_np = 5.5 if date < td(1jan2008) & date >= td(1jan2007) & country == 6
replace women_np = 7 if date < td(1jan2009) & date >= td(1jan2008) & country == 6
replace women_np = 15.5 if date < td(1jan2010) & date >= td(1jan2009) & country == 6
replace women_np = 22 if date < td(1jan2013) & date >= td(1jan2010) & country == 6
replace women_np = 19.5 if date < td(1jan2014) & date >= td(1jan2013) & country == 6

*Denmark
replace women_np = 37.43 if date < td(1jan2001) & country == 7
replace women_np = 37.99 if date < td(1jan2005) & date >= td(1jan2001) & country == 7
replace women_np = 36.87 if date < td(1jan2007) & date >= td(1jan2005) & country == 7
replace women_np = 37.43 if date < td(1jan2010) & date >= td(1jan2007) & country == 7
replace women_np = 37.99 if date < td(1jan2011) & date >= td(1jan2010) & country == 7
replace women_np = 39.11 if date < td(1jan2014) & date >= td(1jan2011) & country == 7

*Estonia
replace women_np = 17.82 if date < td(1jan2003) & country == 8
replace women_np = 18.81 if date < td(1jan2007) & date >= td(1jan2003) & country == 8
replace women_np = 20.79 if date < td(1jan2008) & date >= td(1jan2007) & country == 8
replace women_np = 23.76 if date < td(1jan2009) & date >= td(1jan2008) & country == 8
replace women_np = 22.77 if date < td(1jan2011) & date >= td(1jan2009) & country == 8
replace women_np = 18.81 if date < td(1jan2014) & date >= td(1jan2011) & country == 8

*Finland
replace women_np = 37 if date < td(1jan2000) & country == 9
replace women_np = 36.5 if date < td(1jan2001) & date >= td(1jan2000) & country == 9
replace women_np = 37 if date < td(1jan2003) & date >= td(1jan2001) & country == 9
replace women_np = 37.5 if date < td(1jan2006) & date >= td(1jan2003) & country == 9
replace women_np = 38 if date < td(1jan2007) & date >= td(1jan2006) & country == 9
replace women_np = 41.5 if date < td(1jan2008) & date >= td(1jan2007) & country == 9
replace women_np = 42 if date < td(1jan2009) & date >= td(1jan2008) & country == 9
replace women_np = 40 if date < td(1jan2011) & date >= td(1jan2009) & country == 9
replace women_np = 42.5 if date < td(1jan2015) & date >= td(1jan2011) & country == 9

*France
replace women_np = 10.92 if date < td(1jan2002) & country == 10
replace women_np = 12.37 if date < td(1jan2007) & date >= td(1jan2002) & country == 10
replace women_np = 18.54 if date < td(1jan2010) & date >= td(1jan2007) & country == 10
replace women_np = 18.89 if date < td(1jan2011) & date >= td(1jan2010) & country == 10
replace women_np = 18.54 if date < td(1jan2012) & date >= td(1jan2011) & country == 10
replace women_np = 26.86 if date < td(1jan2014) & date >= td(1jan2012) & country == 10

*Germany
replace women_np = 30.94 if date < td(1jan2002) & country == 11
replace women_np = 32.17 if date < td(1jan2005) & date >= td(1jan2002) & country == 11
replace women_np = 31.76 if date < td(1jan2009) & date >= td(1jan2005) & country == 11
replace women_np = 32.8 if date < td(1jan2013) & date >= td(1jan2009) & country == 11
replace women_np = 36.98 if date < td(1jan2014) & date >= td(1jan2013) & country == 11

*Greece
replace women_np = 6.33 if date < td(1jan2000) & country == 12
replace women_np = 8.67 if date < td(1jan2004) & date >= td(1jan2000) & country == 12
replace women_np = 13 if date < td(1jan2007) & date >= td(1jan2004) & country == 12
replace women_np = 14.67 if date < td(1jan2008) & date >= td(1jan2007) & country == 12
replace women_np = 16 if date < td(1jan2009) & date >= td(1jan2008) & country == 12
replace women_np = 17.33 if date < td(1jan2012) & date >= td(1jan2009) & country == 12
replace women_np = 21 if date < td(1jan2015) & date >= td(1jan2012) & country == 12

*Hungary
replace women_np = 8.29 if date < td(1jan2002) & country == 13
replace women_np = 8.55 if date < td(1jan2006) & date >= td(1jan2002) & country == 13
replace women_np = 10.36 if date < td(1jan2010) & date >= td(1jan2006) & country == 13
replace women_np = 9.07 if date < td(1jan2014) & date >= td(1jan2010) & country == 13

*Ireland
replace women_np = 12.05 if date < td(1jan2002) & country == 14
replace women_np = 13.25 if date < td(1jan2010) & date >= td(1jan2002) & country == 14
replace women_np = 13.94 if date < td(1jan2011) & date >= td(1jan2010) & country == 14
replace women_np = 15.06 if date < td(1jan2014) & date >= td(1jan2011) & country == 14

*Italy
replace women_np = 11.11 if date < td(1jan2001) & country == 15
replace women_np = 9.84 if date < td(1jan2006) & date >= td(1jan2001) & country == 15
replace women_np = 17.3 if date < td(1jan2008) & date >= td(1jan2006) & country == 15
replace women_np = 21.27 if date < td(1jan2013) & date >= td(1jan2008) & country == 15
replace women_np = 28.41 if date < td(1jan2014) & date >= td(1jan2013) & country == 15

*Latvia
replace women_np = 17 if date < td(1jan2002) & country == 16
replace women_np = 18 if date < td(1jan2006) & date >= td(1jan2002) & country == 16
replace women_np = 19 if date < td(1jan2007) & date >= td(1jan2006) & country == 16
replace women_np = 20 if date < td(1jan2008) & date >= td(1jan2007) & country == 16
replace women_np = 19 if date < td(1jan2011) & date >= td(1jan2008) & country == 16
replace women_np = 23 if date < td(1jan2012) & date >= td(1jan2011) & country == 16
replace women_np = 21 if date < td(1jan2014) & date >= td(1jan2012) & country == 16

*Lithuania
replace women_np = 18.25 if date < td(1jan2000) & country == 17
replace women_np = 10.64 if date < td(1jan2004) & date >= td(1jan2000) & country == 17
replace women_np = 21.99 if date < td(1jan2006) & date >= td(1jan2004) & country == 17
replace women_np = 24.82 if date < td(1jan2007) & date >= td(1jan2006) & country == 17
replace women_np = 21.99 if date < td(1jan2008) & date >= td(1jan2007) & country == 17
replace women_np = 17.73 if date < td(1jan2009) & date >= td(1jan2008) & country == 17
replace women_np = 19.15 if date < td(1jan2011) & date >= td(1jan2009) & country == 17
replace women_np = 17.73 if date < td(1jan2012) & date >= td(1jan2011) & country == 17
replace women_np = 24.46 if date < td(1jan2013) & date >= td(1jan2012) & country == 17
replace women_np = 24.11 if date < td(1jan2014) & date >= td(1jan2013) & country == 17

*Luxembourg
replace women_np = 16.67 if date < td(1jan2004) & country == 18
replace women_np = 20 if date < td(1jan2006) & date >= td(1jan2004) & country == 18
replace women_np = 23.33 if date < td(1jan2007) & date >= td(1jan2006) & country == 18
replace women_np = 20 if date < td(1jan2009) & date >= td(1jan2007) & country == 18
replace women_np = 25 if date < td(1jan2010) & date >= td(1jan2009) & country == 18
replace women_np = 20 if date < td(1jan2011) & date >= td(1jan2010) & country == 18
replace women_np = 25 if date < td(1jan2013) & date >= td(1jan2011) & country == 18
replace women_np = 23.33 if date < td(1jan2014) & date >= td(1jan2013) & country == 18

*Malta
replace women_np = 9.23 if date < td(1jan2008) & country == 19
replace women_np = 8.7 if date < td(1jan2013) & date >= td(1jan2008) & country == 19
replace women_np = 14.29 if date < td(1jan2014) & date >= td(1jan2013) & country == 19

*Netherlands
replace women_np = 36 if date < td(1jan2002) & country == 20
replace women_np = 34 if date < td(1jan2003) & date >= td(1jan2002) & country == 20
replace women_np = 36.67 if date < td(1jan2009) & date >= td(1jan2003) & country == 20
replace women_np = 42 if date < td(1jan2010) & date >= td(1jan2009) & country == 20
replace women_np = 40.67 if date < td(1jan2012) & date >= td(1jan2010) & country == 20
replace women_np = 38.67 if date < td(1jan2014) & date >= td(1jan2012) & country == 20

*Poland
replace women_np = 13.04 if date < td(1jan2001) & country == 21
replace women_np = 20.22 if date < td(1jan2005) & date >= td(1jan2001) & country == 21
replace women_np = 20.43 if date < td(1jan2008) & date >= td(1jan2005) & country == 21
replace women_np = 20.22 if date < td(1jan2009) & date >= td(1jan2008) & country == 21
replace women_np = 20.43 if date < td(1jan2010) & date >= td(1jan2009) & country == 21
replace women_np = 20 if date < td(1jan2011) & date >= td(1jan2010) & country == 21
replace women_np = 23.91 if date < td(1jan2014) & date >= td(1jan2011) & country == 21

*Portugal
replace women_np = 18.7 if date < td(1jan2002) & country == 22
replace women_np = 19.13 if date < td(1jan2005) & date >= td(1jan2002) & country == 22
replace women_np = 21.3 if date < td(1jan2007) & date >= td(1jan2005) & country == 22
replace women_np = 28.26 if date < td(1jan2008) & date >= td(1jan2007) & country == 22
replace women_np = 21.3 if date < td(1jan2009) & date >= td(1jan2008) & country == 22
replace women_np = 27.83 if date < td(1jan2010) & date >= td(1jan2009) & country == 22
replace women_np = 27.39 if date < td(1jan2011) & date >= td(1jan2010) & country == 22
replace women_np = 28.7 if date < td(1jan2012) & date >= td(1jan2011) & country == 22
replace women_np = 26.52 if date < td(1jan2014) & date >= td(1jan2012) & country == 22

*Romania
replace women_np = 7.29 if date < td(1jan2000) & country == 23
replace women_np = 10.72 if date < td(1jan2004) & date >= td(1jan2000) & country == 23
replace women_np = 11.45 if date < td(1jan2006) & date >= td(1jan2004) & country == 23
replace women_np = 11.18 if date < td(1jan2007) & date >= td(1jan2006) & country == 23
replace women_np = 11.45 if date < td(1jan2008) & date >= td(1jan2007) & country == 23
replace women_np = 11.38 if date < td(1jan2012) & date >= td(1jan2008) & country == 23
replace women_np = 13.35 if date < td(1jan2014) & date >= td(1jan2012) & country == 23

*Slovakia
replace women_np = 12.67 if date < td(1jan2002) & country == 24
replace women_np = 19.33 if date < td(1jan2006) & date >= td(1jan2002) & country == 24
replace women_np = 20 if date < td(1jan2007) & date >= td(1jan2006) & country == 24
replace women_np = 19.33 if date < td(1jan2008) & date >= td(1jan2007) & country == 24
replace women_np = 20 if date < td(1jan2010) & date >= td(1jan2008) & country == 24
replace women_np = 15.33 if date < td(1jan2012) & date >= td(1jan2010) & country == 24
replace women_np = 18.67 if date < td(1jan2013) & date >= td(1jan2012) & country == 24
replace women_np = 16 if date < td(1jan2014) & date >= td(1jan2013) & country == 24

*Slovenia
replace women_np = 7.78 if date < td(1jan2000) & country == 25
replace women_np = 10 if date < td(1jan2001) & date >= td(1jan2000) & country == 25
replace women_np = 12.22 if date < td(1jan2008) & date >= td(1jan2001) & country == 25
replace women_np = 13.33 if date < td(1jan2010) & date >= td(1jan2008) & country == 25
replace women_np = 14.44 if date < td(1jan2011) & date >= td(1jan2010) & country == 25
replace women_np = 32.22 if date < td(1jan2014) & date >= td(1jan2011) & country == 25

*Spain
replace women_np = 21.43 if date < td(1jan2000) & country == 26
replace women_np = 28.29 if date < td(1jan2004) & date >= td(1jan2000) & country == 26
replace women_np = 36 if date < td(1jan2008) & date >= td(1jan2004) & country == 26
replace women_np = 36.29 if date < td(1jan2009) & date >= td(1jan2008) & country == 26
replace women_np = 36.57 if date < td(1jan2011) & date >= td(1jan2009) & country == 26
replace women_np = 36 if date < td(1jan2014) & date >= td(1jan2011) & country == 26

*Sweden
replace women_np = 42.69 if date < td(1jan2002) & country == 27
replace women_np = 45.27 if date < td(1jan2006) & date >= td(1jan2002) & country == 27
replace women_np = 47.28 if date < td(1jan2009) & date >= td(1jan2006) & country == 27
replace women_np = 46.42 if date < td(1jan2010) & date >= td(1jan2009) & country == 27
replace women_np = 44.99 if date < td(1jan2014) & date >= td(1jan2010) & country == 27

*United Kingdom
replace women_np = 18.21 if date < td(1jan2001) & country == 28
replace women_np = 17.91 if date < td(1jan2005) & date >= td(1jan2001) & country == 28
replace women_np = 19.81 if date < td(1jan2010) & date >= td(1jan2005) & country == 28
replace women_np = 22 if date < td(1jan2011) & date >= td(1jan2010) & country == 28
replace women_np = 22.92 if date < td(1jan2013) & date >= td(1jan2011) & country == 28
replace women_np = 22 if date < td(1jan2014) & date >= td(1jan2013) & country == 28

replace women_np =. if women_np == 0
br if women_np ==.


*(8) Quota laws
/* Data obtained from: https://www.idea.int/data-tools/data/gender-quotas/regions-overview#Europe */
codebook partyname, tab(1000)

gen leg_quota = 0

label var leg_quota "Legislative Quota"
label define ///
	lg ///
	0 "No" ///
	1 "Yes"
	
label values ///
	leg_quota lg
	
	
gen vol_party_quota = 0

label var vol_party_quota "Voluntary Party Quota"
label define ///
	vpq ///
	0 "No" ///
	1 "Yes"
	
label values ///
	vol_party_quota vpq
	
	
gen quotasize =. 
label var quotasize "Quotasize"

*Austria
tab partyname if country == 1

replace vol_party_quota = 1 if country == 1 & strpos(partyname, "Die Grünen - Die Grüne Alternative") | strpos(partyname, "Österreichische Volkspartei") | strpos(partyname, "Österreichische Volkspartei - Liste Ursula Stenzel") | strpos(partyname, "Sozialdemokratische Partei Österreichs")

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Die Grünen - Die Grüne Alternative") 
replace quotasize = 33.3 if vol_party_quota == 1 & strpos(partyname, "Österreichische Volkspartei") | strpos(partyname, "Österreichische Volkspartei - Liste Ursula Stenzel") 
replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Sozialdemokratische Partei Österreichs")

*Belgium 
replace leg_quota = 1 if country == 2 

*Bulgaria
replace leg_quota = 1 if country == 3

*Croatia
tab partyname if country == 4

replace leg_quota = 1 if country == 4

*Cyprus
tab partyname if country == 5

replace vol_party_quota = 1 if country == 5 & strpos(partyname, "Kinima Sosialdimokraton") | strpos(partyname, "Movement for Social Democracy EDEK") | strpos(partyname, "Dimokratikos Synagermos") | strpos(partyname, "Dimocraticos Synagermos") | strpos(partyname, "Democratic Rally") | strpos(partyname, "Democratic Party")

replace quotasize = 30 if vol_party_quota == 1 & strpos(partyname, "Kinima Sosialdimokraton") | strpos(partyname, "Movement for Social Democracy EDEK") 
replace quotasize = 30 if vol_party_quota == 1 & strpos(partyname, "Dimokratikos Synagermos") | strpos(partyname, "Dimocraticos Synagermos") | strpos(partyname, "Democratic Rally") 
replace quotasize = 20 if vol_party_quota == 1 & strpos(partyname, "Democratic Party")

*France 
replace leg_quota = 1 if country == 10 

tab partyname if country == 10

replace vol_party_quota = 1 if country == 10 & strpos(partyname, "Parti Socialiste") | strpos(partyname, "Parti socialiste") | strpos(partyname, "Union des Démocrates et Indépendant") 

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Parti Socialiste") | strpos(partyname, "Parti socialiste") | strpos(partyname, "Union des Démocrates et Indépendant") 

*Germany
tab partyname if country == 11

replace vol_party_quota = 1 if country == 11 & strpos(partyname, "Sozialdemokratische Partei Deutschl") | strpos(partyname, "Linkspartei.PDS") | strpos(partyname, "DIE LINKE.") | strpos(partyname, "Bündnis 90/Die Grünen") | strpos(partyname, "Christlich Demokratische Union Deut") | strpos(partyname, "Christlich-Soziale Union in Bayern")

replace quotasize = 30 if vol_party_quota == 1 & strpos(partyname, "Christlich Demokratische Union Deut") 

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Sozialdemokratische Partei Deutschl") 

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Linkspartei.PDS") | strpos(partyname, "DIE LINKE.") | strpos(partyname, "Bündnis 90/Die Grünen") 

*Greece
replace leg_quota = 1 if country == 12

tab partyname if country == 12

replace vol_party_quota = 1 if country == 12 & strpos(partyname, "Panellinio Socialistiko Kinima") | strpos(partyname, "Panhellenic Socialist Movement") 

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Panellinio Socialistiko Kinima") | strpos(partyname, "Panhellenic Socialist Movement" ) 

*Hungary 
tab partyname if country == 13

replace vol_party_quota = 1 if country == 13 & strpos(partyname, "Magyar Szocialista Párt") 

replace quotasize = 20 if vol_party_quota == 1 & strpos(partyname, "Magyar Szocialista Párt") 

*Ireland
replace leg_quota = 1 if country == 14

*Italy
replace leg_quota = 1 if country == 15

tab partyname if country == 15

replace vol_party_quota = 1 if country == 15 & strpos(partyname, "Partito Democratico") 

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Partito Democratico") 

*Lithuania
tab partyname if country == 17

replace vol_party_quota = 1 if country == 17 & strpos(partyname, "Lietuvos Socialdemokratų Partija") | strpos(partyname, "Lietuvos socialdemokratų partija") 

replace quotasize = 33 if vol_party_quota == 1 & strpos(partyname, "Lietuvos Socialdemokratų Partija") | strpos(partyname, "Lietuvos socialdemokratų partija") 

*Luxembourg
tab partyname if country == 18

replace vol_party_quota = 1 if country == 18 & strpos(partyname, "Déi Gréng - Les Verts") | strpos(partyname, "Les Verts") | strpos(partyname, "Parti ouvrier socialiste luxembourg") | strpos(partyname, "Parti chrétien social luxembourg") | strpos(partyname, "Parti chrétien social") 

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Déi Gréng - Les Verts") | strpos(partyname, "Les Verts") 

replace quotasize = 33 if vol_party_quota == 1 & strpos(partyname, "Parti ouvrier socialiste luxembourg") | strpos(partyname, "Parti chrétien social luxembourg") | strpos(partyname, "Parti chrétien social") 

*Malta
tab partyname if country == 19

replace vol_party_quota = 1 if country == 19 & strpos(partyname, "Partit Laburista") 

replace quotasize = 20 if vol_party_quota == 1 & strpos(partyname, "Partit Laburista") 

*Netherlands
tab partyname if country == 20

replace vol_party_quota = 1 if country == 20 & strpos(partyname, "Partij van de Arbeid") | strpos(partyname, "GroenLinks") 

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Partij van de Arbeid") 

*Poland
replace leg_quota = 1 if country == 21

*Portugal
replace leg_quota = 1 if country == 22

*Romania
tab partyname if country == 23

replace vol_party_quota = 1 if country == 23 & strpos(partyname, "Partidul Social Democrat") & date >= td(1jan2004)

replace quotasize = 30 if vol_party_quota == 1 & strpos(partyname, "Partidul Social Democrat")  & date >= td(1jan2004)

*Slovakia
replace vol_party_quota = 1 if country == 24 

*Slovenia
replace leg_quota = 1 if country == 25

tab partyname if country == 25

replace vol_party_quota = 1 if country == 25 & strpos(partyname, "Združena lista socialnih demokratov") | strpos(partyname, "Socialni demokrati") 

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Združena lista socialnih demokratov") | strpos(partyname, "Socialni demokrati") 

*Spain
replace leg_quota = 1 if country == 26

tab partyname if country == 26

replace vol_party_quota = 1 if country == 26 & strpos(partyname, "Coalicion Canaria") & date >= td(1jan2000)

replace vol_party_quota = 1 if country == 26 & strpos(partyname, "Bloque Nacionalista Galego") & date >= td(1jan2002)

replace vol_party_quota = 1 if country == 26 & strpos(partyname, "Esquerra Republicana de Catalunya") & date >= td(1jan2004)

replace vol_party_quota = 1 if country == 26 & strpos(partyname, "Partit dels Socialistes de Catalunya") | strpos(partyname,  "Izquierda Unida") | strpos(partyname, "Partido Socialista Obrero Español")


replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Izquierda Unida") | strpos(partyname, "Partido Socialista Obrero Español") 

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Coalicion Canaria") & date >= td(1jan2000)

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Bloque Nacionalista Galego") & date >= td(1jan2002)

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Esquerra Republicana de Catalunya") & date >= td(1jan2004)

replace quotasize = 30 if vol_party_quota == 1 & strpos(partyname, "Partit dels Socialistes de Catalunya") & date < td(1jan2000)

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Partit dels Socialistes de Catalunya") & date >= td(1jan2000)

*Sweden 
tab partyname if country == 27

replace vol_party_quota = 1 if country == 27 & (strpos(partyname, "Arbetarepartiet- Socialdemokraterna") | strpos(partyname, "Socialdemokratiska arbetarepartiet") |   strpos(partyname, "Vänsterpartiet") | strpos(partyname, "Miljöpartiet de gröna") | strpos(partyname, "Miljöpartiet") | strpos(partyname, "Moderata Samlingspartiet") | strpos(partyname, "Moderata samlingspartiet"))

replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Arbetarepartiet- Socialdemokraterna") | strpos(partyname, "Socialdemokratiska arbetarepartiet") |   strpos(partyname, "Vänsterpartiet") | strpos(partyname, "Miljöpartiet de gröna") | strpos(partyname, "Miljöpartiet") 
replace quotasize = 50 if vol_party_quota == 1 & date >= td(20jul2009) & (strpos(partyname, "Moderata Samlingspartiet") | strpos(partyname, "Moderata samlingspartiet"))
			
*United Kingdom 
tab partyname if country == 28

replace vol_party_quota = 1 if country == 28 & strpos(partyname, "Labour Party") | strpos(partyname, "Liberal Democrat Party") | strpos(partyname, "Liberal Democrats Party") 

replace quotasize = 40 if vol_party_quota == 1 & strpos(partyname, "Liberal Democrats Party") | strpos(partyname, "Liberal Democrat Party") 
replace quotasize = 50 if vol_party_quota == 1 & strpos(partyname, "Labour Party") 


*(9) Year Dummies 
gen year_1999 = 1 if date < td(1jan2000)
replace year_1999 = 0 if year_1999 ==.
label var year_1999 "1999"
label define ///
	y99 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_1999 y99
	
gen year_2000 = 1 if date >= td(1jan2000) & date < td(1jan2001)
replace year_2000 = 0 if year_2000 ==.
label var year_2000 "2000"
label define ///
	y00 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2000 y00
	
gen year_2001 = 1 if date >= td(1jan2001) & date < td(1jan2002)
replace year_2001 = 0 if year_2001 ==.
label var year_2001 "2001"
label define ///
	y01 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2001 y01
	
gen year_2002 = 1 if date >= td(1jan2002) & date < td(1jan2003)
replace year_2002 = 0 if year_2002 ==.
label var year_2002 "2002"
label define ///
	y02 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2002 y02
	
gen year_2003 = 1 if date >= td(1jan2003) & date < td(1jan2004)
replace year_2003 = 0 if year_2003 ==.
label var year_2003 "2003"
label define ///
	y03 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2003 y03
	
gen year_2004 = 1 if date >= td(1jan2004) & date < td(1jan2005)
replace year_2004 = 0 if year_2004 ==.
label var year_2004 "2004"
label define ///
	y04 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2004 y04
	
gen year_2005 = 1 if date >= td(1jan2005) & date < td(1jan2006)
replace year_2005 = 0 if year_2005 ==.
label var year_2005 "2005"
label define ///
	y05 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2005 y05
	
gen year_2006 = 1 if date >= td(1jan2006) & date < td(1jan2007)
replace year_2006 = 0 if year_2006 ==.
label var year_2006 "2006"
label define ///
	y06 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2006 y06
	
gen year_2007 = 1 if date >= td(1jan2007) & date < td(1jan2008)
replace year_2007 = 0 if year_2007 ==.
label var year_2007 "2007"
label define ///
	y07 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2007 y07
	
gen year_2008 = 1 if date >= td(1jan2008) & date < td(1jan2009)
replace year_2008 = 0 if year_2008 ==.
label var year_2008 "2008"
label define ///
	y08 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2008 y08
	
gen year_2009 = 1 if date >= td(1jan2009) & date < td(1jan2010)
replace year_2009 = 0 if year_2009 ==.
label var year_2009 "2009"
label define ///
	y09 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2009 y09
	
gen year_2010 = 1 if date >= td(1jan2010) & date < td(1jan2011)
replace year_2010 = 0 if year_2010 ==.
label var year_2010 "2010"
label define ///
	y10 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2010 y10
	
gen year_2011 = 1 if date >= td(1jan2011) & date < td(1jan2012)
replace year_2011 = 0 if year_2011 ==.
label var year_2011 "2011"
label define ///
	y11 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2011 y11
	
gen year_2012 = 1 if date >= td(1jan2012) & date < td(1jan2013)
replace year_2012 = 0 if year_2012 ==.
label var year_2012 "2012"
label define ///
	y12 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2012 y12
	
gen year_2013 = 1 if date >= td(1jan2013) 
replace year_2013 = 0 if year_2013 ==.
label var year_2013 "2013"
label define ///
	y13 ///
	0 "No" ///
	1 "Yes"
	
label values ///
	year_2013 y13
	
*(10) Generate empirical analysis variables  
*New variable for speeches/speeches overall

gen year = year(date)
label var year "Year"

gen speeches = 1
gen dict = 1 if dictionary == 1

*SRWI overall
bysort name : egen speech = count(speeches)
bysort name : egen favspeech = count(dict)
label var favspeech "SRWI Speeches made by MEP over career"
bysort name : gen srwi = favspeech/speech
label var srwi "Substantive Representation of Women over MEP career"

*SRWI per parliamentary term 
bysort name parlterm : egen speech_term = count(speeches)
bysort name parlterm : egen favspeech_term = count(dict)
label var favspeech_term "SRWI Speeches made by MEP across parliamentary term"
bysort name parlterm : gen srwi_term = favspeech_term/speech_term 
label var srwi_term "Substantive Representation of Women across parliamentary term"

*SRWI per year
bysort name year : egen speech_year = count(speeches)
bysort name year : egen favspeech_year = count(dict)
bysort name year : gen srwi_year = favspeech_year/speech_year
label var srwi_year "Substantive Representation of Women per year"

drop dict speech speeches speech_term speech_year favspeech_year

save "$data/mep_srwi_0913.dta", replace

